50 files changed, 845 insertions, 863 deletions
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index df7a5e0a9..9be5b2780 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -4,6 +4,13 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
 include(DownloadExternals)
 
+# xbyak
+if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
+    add_library(xbyak INTERFACE)
+    target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
+    target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
+endif()
+
 # Catch
 add_library(catch-single-include INTERFACE)
 target_include_directories(catch-single-include INTERFACE catch/single_include)
@@ -75,11 +82,3 @@ if (ENABLE_WEB_SERVICE)
     target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
     target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto)
 endif()
-
-if (NOT TARGET xbyak)
-    if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
-        add_library(xbyak INTERFACE)
-        target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
-        target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
-    endif()
-endif()
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 36724569f..c4c5199b1 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -132,7 +132,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const {
 
 u64 Process::GetTotalPhysicalMemoryAvailable() const {
     const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) +
-                       page_table->GetTotalHeapSize() + image_size + main_thread_stack_size};
+                       page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
+                       main_thread_stack_size};
 
     if (capacity < memory_usage_capacity) {
         return capacity;
@@ -146,7 +147,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
 }
 
 u64 Process::GetTotalPhysicalMemoryUsed() const {
-    return image_size + main_thread_stack_size + page_table->GetTotalHeapSize();
+    return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
+           GetSystemResourceSize();
 }
 
 u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index d9beaa3a4..212e442f4 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) {
     const std::size_t index{ResourceTypeToIndex(resource)};
 
     s64 new_value = current[index] + amount;
-    while (new_value > limit[index] && available[index] + amount <= limit[index]) {
+    if (new_value > limit[index] && available[index] + amount <= limit[index]) {
         // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
         new_value = current[index] + amount;
-
-        if (timeout >= 0) {
-            break;
-        }
     }
 
     if (new_value <= limit[index]) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index cc2192e5c..0d913334e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -25,7 +25,7 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input,
     case IoctlCommand::IocGetCharacteristicsCommand:
         return GetCharacteristics(input, output, output2, version);
     case IoctlCommand::IocGetTPCMasksCommand:
-        return GetTPCMasks(input, output);
+        return GetTPCMasks(input, output, output2, version);
     case IoctlCommand::IocGetActiveSlotMaskCommand:
         return GetActiveSlotMask(input, output);
     case IoctlCommand::IocZcullGetCtxSizeCommand:
@@ -98,17 +98,22 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto
     return 0;
 }
 
-u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
+                                 std::vector<u8>& output2, IoctlVersion version) {
     IoctlGpuGetTpcMasksArgs params{};
     std::memcpy(&params, input.data(), input.size());
-    LOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size,
-             params.mask_buf_addr);
-    // TODO(ogniK): Confirm value on hardware
-    if (params.mask_buf_size)
-        params.tpc_mask_size = 4 * 1; // 4 * num_gpc
-    else
-        params.tpc_mask_size = 0;
-    std::memcpy(output.data(), &params, sizeof(params));
+    LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
+    if (params.mask_buffer_size != 0) {
+        params.tcp_mask = 3;
+    }
+
+    if (version == IoctlVersion::Version3) {
+        std::memcpy(output.data(), input.data(), output.size());
+        std::memcpy(output2.data(), &params.tcp_mask, output2.size());
+    } else {
+        std::memcpy(output.data(), &params, output.size());
+    }
+
     return 0;
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 07b644ec5..ef60f72ce 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -92,16 +92,11 @@ private:
                   "IoctlCharacteristics is incorrect size");
 
     struct IoctlGpuGetTpcMasksArgs {
-        /// [in]  TPC mask buffer size reserved by userspace. Should be at least
-        /// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC.
-        /// [out] full kernel buffer size
-        u32_le mask_buf_size;
-        u32_le reserved;
-
-        /// [in]  pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
-        /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
-        u64_le mask_buf_addr;
-        u64_le tpc_mask_size; // Nintendo add this?
+        u32_le mask_buffer_size{};
+        INSERT_PADDING_WORDS(1);
+        u64_le mask_buffer_address{};
+        u32_le tcp_mask{};
+        INSERT_PADDING_WORDS(1);
     };
     static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
                   "IoctlGpuGetTpcMasksArgs is incorrect size");
@@ -166,7 +161,8 @@ private:
 
     u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
                            std::vector<u8>& output2, IoctlVersion version);
-    u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2,
+                    IoctlVersion version);
     u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
     u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
     u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
diff --git a/src/core/settings.h b/src/core/settings.h
index 36cd66fd4..33e1e06cd 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -437,7 +437,7 @@ struct Values {
     bool renderer_debug;
     int vulkan_device;
 
-    float resolution_factor;
+    u16 resolution_factor{1};
     int aspect_ratio;
     int max_anisotropy;
     bool use_frame_limit;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2af713af2..099bb446e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -49,8 +49,6 @@ add_library(video_core STATIC
     query_cache.h
     rasterizer_accelerated.cpp
     rasterizer_accelerated.h
-    rasterizer_cache.cpp
-    rasterizer_cache.h
     rasterizer_interface.h
     renderer_base.cpp
     renderer_base.h
@@ -95,6 +93,7 @@ add_library(video_core STATIC
     renderer_opengl/utils.h
     sampler_cache.cpp
     sampler_cache.h
+    shader_cache.h
     shader/decode/arithmetic.cpp
     shader/decode/arithmetic_immediate.cpp
     shader/decode/bfe.cpp
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index e35ee0b67..e64170e66 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,48 +15,47 @@ namespace VideoCommon {
 
 class BufferBlock {
 public:
-    bool Overlaps(const VAddr start, const VAddr end) const {
+    bool Overlaps(VAddr start, VAddr end) const {
         return (cpu_addr < end) && (cpu_addr_end > start);
     }
 
-    bool IsInside(const VAddr other_start, const VAddr other_end) const {
+    bool IsInside(VAddr other_start, VAddr other_end) const {
         return cpu_addr <= other_start && other_end <= cpu_addr_end;
     }
 
-    std::size_t GetOffset(const VAddr in_addr) {
+    std::size_t Offset(VAddr in_addr) const {
         return static_cast<std::size_t>(in_addr - cpu_addr);
     }
 
-    VAddr GetCpuAddr() const {
+    VAddr CpuAddr() const {
         return cpu_addr;
     }
 
-    VAddr GetCpuAddrEnd() const {
+    VAddr CpuAddrEnd() const {
         return cpu_addr_end;
     }
 
-    void SetCpuAddr(const VAddr new_addr) {
+    void SetCpuAddr(VAddr new_addr) {
         cpu_addr = new_addr;
         cpu_addr_end = new_addr + size;
     }
 
-    std::size_t GetSize() const {
+    std::size_t Size() const {
         return size;
     }
 
-    void SetEpoch(u64 new_epoch) {
-        epoch = new_epoch;
+    u64 Epoch() const {
+        return epoch;
     }
 
-    u64 GetEpoch() {
-        return epoch;
+    void SetEpoch(u64 new_epoch) {
+        epoch = new_epoch;
     }
 
 protected:
-    explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
-        SetCpuAddr(cpu_addr);
+    explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
+        SetCpuAddr(cpu_addr_);
     }
-    ~BufferBlock() = default;
 
 private:
     VAddr cpu_addr{};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b88fce2cd..308d8b55f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -30,12 +30,16 @@
 
 namespace VideoCommon {
 
-template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
+template <typename Buffer, typename BufferType, typename StreamBuffer>
 class BufferCache {
     using IntervalSet = boost::icl::interval_set<VAddr>;
     using IntervalType = typename IntervalSet::interval_type;
     using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
 
+    static constexpr u64 WRITE_PAGE_BIT = 11;
+    static constexpr u64 BLOCK_PAGE_BITS = 21;
+    static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
+
 public:
     using BufferInfo = std::pair<BufferType, u64>;
 
@@ -82,7 +86,7 @@ public:
             }
         }
 
-        OwnerBuffer block = GetBlock(cpu_addr, size);
+        Buffer* const block = GetBlock(cpu_addr, size);
         MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
         if (!map) {
             return {GetEmptyBuffer(size), 0};
@@ -98,7 +102,7 @@ public:
             }
         }
 
-        return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
+        return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))};
     }
 
     /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@@ -110,31 +114,37 @@ public:
         });
     }
 
-    void Map(std::size_t max_size) {
+    /// Prepares the buffer cache for data uploading
+    /// @param max_size Maximum number of bytes that will be uploaded
+    /// @return True when a stream buffer invalidation was required, false otherwise
+    bool Map(std::size_t max_size) {
         std::lock_guard lock{mutex};
 
+        bool invalidated;
         std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
         buffer_offset = buffer_offset_base;
+
+        return invalidated;
     }
 
-    /// Finishes the upload stream, returns true on bindings invalidation.
-    bool Unmap() {
+    /// Finishes the upload stream
+    void Unmap() {
         std::lock_guard lock{mutex};
-
         stream_buffer->Unmap(buffer_offset - buffer_offset_base);
-        return std::exchange(invalidated, false);
     }
 
+    /// Function called at the end of each frame, inteded for deferred operations
     void TickFrame() {
         ++epoch;
+
         while (!pending_destruction.empty()) {
             // Delay at least 4 frames before destruction.
             // This is due to triple buffering happening on some drivers.
             static constexpr u64 epochs_to_destroy = 5;
-            if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
+            if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
                 break;
             }
-            pending_destruction.pop_front();
+            pending_destruction.pop();
         }
     }
 
@@ -249,23 +259,21 @@ public:
 
 protected:
     explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
-                         std::unique_ptr<StreamBuffer> stream_buffer)
-        : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
-          stream_buffer_handle{this->stream_buffer->GetHandle()} {}
+                         std::unique_ptr<StreamBuffer> stream_buffer_)
+        : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)},
+          stream_buffer_handle{stream_buffer->Handle()} {}
 
     ~BufferCache() = default;
 
-    virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
+    virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
 
-    virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
-
-    virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
+    virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                                  const u8* data) = 0;
 
-    virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
+    virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                                    u8* data) = 0;
 
-    virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
+    virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
                            std::size_t dst_offset, std::size_t size) = 0;
 
     virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
@@ -321,7 +329,7 @@ protected:
     }
 
 private:
-    MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr,
+    MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr,
                             std::size_t size) {
         const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
         if (overlaps.empty()) {
@@ -329,11 +337,11 @@ private:
             const VAddr cpu_addr_end = cpu_addr + size;
             if (memory_manager.IsGranularRange(gpu_addr, size)) {
                 u8* host_ptr = memory_manager.GetPointer(gpu_addr);
-                UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
+                UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr);
             } else {
                 staging_buffer.resize(size);
                 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
-                UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
+                UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data());
             }
             return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
         }
@@ -376,7 +384,7 @@ private:
         return map;
     }
 
-    void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
+    void UpdateBlock(const Buffer* block, VAddr start, VAddr end,
                      const VectorMapInterval& overlaps) {
         const IntervalType base_interval{start, end};
         IntervalSet interval_set{};
@@ -386,13 +394,13 @@ private:
             interval_set.subtract(subtract);
         }
         for (auto& interval : interval_set) {
-            std::size_t size = interval.upper() - interval.lower();
-            if (size > 0) {
-                staging_buffer.resize(size);
-                system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
-                UploadBlockData(block, block->GetOffset(interval.lower()), size,
-                                staging_buffer.data());
+            const std::size_t size = interval.upper() - interval.lower();
+            if (size == 0) {
+                continue;
             }
+            staging_buffer.resize(size);
+            system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+            UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data());
         }
     }
 
@@ -422,10 +430,14 @@ private:
     }
 
     void FlushMap(MapInterval* map) {
+        const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
+        ASSERT_OR_EXECUTE(it != blocks.end(), return;);
+
+        std::shared_ptr<Buffer> block = it->second;
+
         const std::size_t size = map->end - map->start;
-        OwnerBuffer block = blocks[map->start >> block_page_bits];
         staging_buffer.resize(size);
-        DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data());
+        DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data());
         system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
         map->MarkAsModified(false, 0);
     }
@@ -448,97 +460,89 @@ private:
         buffer_offset = offset_aligned;
     }
 
-    OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
-        const std::size_t old_size = buffer->GetSize();
-        const std::size_t new_size = old_size + block_page_size;
-        const VAddr cpu_addr = buffer->GetCpuAddr();
-        OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
-        CopyBlock(buffer, new_buffer, 0, 0, old_size);
-        buffer->SetEpoch(epoch);
-        pending_destruction.push_back(buffer);
+    std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
+        const std::size_t old_size = buffer->Size();
+        const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
+        const VAddr cpu_addr = buffer->CpuAddr();
+        std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
+        CopyBlock(*buffer, *new_buffer, 0, 0, old_size);
+        QueueDestruction(std::move(buffer));
+
         const VAddr cpu_addr_end = cpu_addr + new_size - 1;
-        u64 page_start = cpu_addr >> block_page_bits;
-        const u64 page_end = cpu_addr_end >> block_page_bits;
-        while (page_start <= page_end) {
-            blocks[page_start] = new_buffer;
-            ++page_start;
+        const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+        for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
+            blocks.insert_or_assign(page_start, new_buffer);
         }
+
         return new_buffer;
     }
 
-    OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
-        const std::size_t size_1 = first->GetSize();
-        const std::size_t size_2 = second->GetSize();
-        const VAddr first_addr = first->GetCpuAddr();
-        const VAddr second_addr = second->GetCpuAddr();
+    std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
+                                        std::shared_ptr<Buffer> second) {
+        const std::size_t size_1 = first->Size();
+        const std::size_t size_2 = second->Size();
+        const VAddr first_addr = first->CpuAddr();
+        const VAddr second_addr = second->CpuAddr();
         const VAddr new_addr = std::min(first_addr, second_addr);
         const std::size_t new_size = size_1 + size_2;
-        OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
-        CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
-        CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
-        first->SetEpoch(epoch);
-        second->SetEpoch(epoch);
-        pending_destruction.push_back(first);
-        pending_destruction.push_back(second);
+
+        std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
+        CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1);
+        CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2);
+        QueueDestruction(std::move(first));
+        QueueDestruction(std::move(second));
+
         const VAddr cpu_addr_end = new_addr + new_size - 1;
-        u64 page_start = new_addr >> block_page_bits;
-        const u64 page_end = cpu_addr_end >> block_page_bits;
-        while (page_start <= page_end) {
-            blocks[page_start] = new_buffer;
-            ++page_start;
+        const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+        for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
+            blocks.insert_or_assign(page_start, new_buffer);
         }
         return new_buffer;
     }
 
-    OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
-        OwnerBuffer found;
+    Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
+        std::shared_ptr<Buffer> found;
+
         const VAddr cpu_addr_end = cpu_addr + size - 1;
-        u64 page_start = cpu_addr >> block_page_bits;
-        const u64 page_end = cpu_addr_end >> block_page_bits;
-        while (page_start <= page_end) {
+        const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+        for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
             auto it = blocks.find(page_start);
             if (it == blocks.end()) {
                 if (found) {
                     found = EnlargeBlock(found);
-                } else {
-                    const VAddr start_addr = (page_start << block_page_bits);
-                    found = CreateBlock(start_addr, block_page_size);
-                    blocks[page_start] = found;
-                }
-            } else {
-                if (found) {
-                    if (found == it->second) {
-                        ++page_start;
-                        continue;
-                    }
-                    found = MergeBlocks(found, it->second);
-                } else {
-                    found = it->second;
+                    continue;
                 }
+                const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
+                found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
+                blocks.insert_or_assign(page_start, found);
+                continue;
+            }
+            if (!found) {
+                found = it->second;
+                continue;
+            }
+            if (found != it->second) {
+                found = MergeBlocks(std::move(found), it->second);
             }
-            ++page_start;
         }
-        return found;
+        return found.get();
     }
 
-    void MarkRegionAsWritten(const VAddr start, const VAddr end) {
-        u64 page_start = start >> write_page_bit;
-        const u64 page_end = end >> write_page_bit;
-        while (page_start <= page_end) {
+    void MarkRegionAsWritten(VAddr start, VAddr end) {
+        const u64 page_end = end >> WRITE_PAGE_BIT;
+        for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
             auto it = written_pages.find(page_start);
             if (it != written_pages.end()) {
                 it->second = it->second + 1;
             } else {
-                written_pages[page_start] = 1;
+                written_pages.insert_or_assign(page_start, 1);
             }
-            ++page_start;
         }
     }
 
-    void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
-        u64 page_start = start >> write_page_bit;
-        const u64 page_end = end >> write_page_bit;
-        while (page_start <= page_end) {
+    void UnmarkRegionAsWritten(VAddr start, VAddr end) {
+        const u64 page_end = end >> WRITE_PAGE_BIT;
+        for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
             auto it = written_pages.find(page_start);
             if (it != written_pages.end()) {
                 if (it->second > 1) {
@@ -547,22 +551,24 @@ private:
                     written_pages.erase(it);
                 }
             }
-            ++page_start;
         }
     }
 
-    bool IsRegionWritten(const VAddr start, const VAddr end) const {
-        u64 page_start = start >> write_page_bit;
-        const u64 page_end = end >> write_page_bit;
-        while (page_start <= page_end) {
+    bool IsRegionWritten(VAddr start, VAddr end) const {
+        const u64 page_end = end >> WRITE_PAGE_BIT;
+        for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
             if (written_pages.count(page_start) > 0) {
                 return true;
             }
-            ++page_start;
         }
         return false;
     }
 
+    void QueueDestruction(std::shared_ptr<Buffer> buffer) {
+        buffer->SetEpoch(epoch);
+        pending_destruction.push(std::move(buffer));
+    }
+
     void MarkForAsyncFlush(MapInterval* map) {
         if (!uncommitted_flushes) {
             uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
@@ -574,9 +580,7 @@ private:
     Core::System& system;
 
     std::unique_ptr<StreamBuffer> stream_buffer;
-    BufferType stream_buffer_handle{};
-
-    bool invalidated = false;
+    BufferType stream_buffer_handle;
 
     u8* buffer_ptr = nullptr;
     u64 buffer_offset = 0;
@@ -586,18 +590,15 @@ private:
     boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
         mapped_addresses;
 
-    static constexpr u64 write_page_bit = 11;
     std::unordered_map<u64, u32> written_pages;
+    std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
 
-    static constexpr u64 block_page_bits = 21;
-    static constexpr u64 block_page_size = 1ULL << block_page_bits;
-    std::unordered_map<u64, OwnerBuffer> blocks;
-
-    std::list<OwnerBuffer> pending_destruction;
+    std::queue<std::shared_ptr<Buffer>> pending_destruction;
     u64 epoch = 0;
     u64 modified_ticks = 0;
 
     std::vector<u8> staging_buffer;
+
     std::list<MapInterval*> marked_for_unregister;
 
     std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index ebe139504..f46e81bb7 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -93,6 +93,7 @@ public:
     virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
     virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                                     u64 offset) const = 0;
+    virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
     virtual u32 GetBoundBuffer() const = 0;
 
     virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index f6237fc6a..a82b06a38 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
     ASSERT(stage == ShaderType::Compute);
     const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
     const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
+    return AccessSampler(memory_manager.Read<u32>(tex_info_address));
+}
 
-    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
+    const Texture::TextureHandle tex_handle{handle};
     const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
     SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
     result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 18ceedfaf..b7f668d88 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -219,6 +219,8 @@ public:
     SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                             u64 offset) const override;
 
+    SamplerDescriptor AccessSampler(u32 handle) const override;
+
     u32 GetBoundBuffer() const override {
         return regs.tex_cb_index;
     }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index e46b153f9..ea3c8a963 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -740,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
     const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
     const auto& tex_info_buffer = shader.const_buffers[const_buffer];
     const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
+    return AccessSampler(memory_manager.Read<u32>(tex_info_address));
+}
 
-    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
+    const Texture::TextureHandle tex_handle{handle};
     const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
     SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
     result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 79fc9bbea..d5fe25065 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1404,6 +1404,8 @@ public:
     SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                             u64 offset) const override;
 
+    SamplerDescriptor AccessSampler(u32 handle) const override;
+
     u32 GetBoundBuffer() const override {
         return regs.tex_cb_index;
     }
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 21ee157cf..71f738b9a 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -85,8 +85,8 @@ private:
     std::optional<Macro::Opcode> next_opcode{};
     ProgramType program{nullptr};
 
-    std::array<Xbyak::Label, MAX_CODE_SIZE> labels{};
-    std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip{};
+    std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
+    std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
     Xbyak::Label end_of_code{};
 
     bool is_delay_slot{};
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp
deleted file mode 100644
index 093b2cdf4..000000000
--- a/src/video_core/rasterizer_cache.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/rasterizer_cache.h"
-
-RasterizerCacheObject::~RasterizerCacheObject() = default;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
deleted file mode 100644
index 096ee337c..000000000
--- a/src/video_core/rasterizer_cache.h
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <mutex>
-#include <set>
-#include <unordered_map>
-
-#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range_core.hpp>
-
-#include "common/common_types.h"
-#include "core/settings.h"
-#include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
-
-class RasterizerCacheObject {
-public:
-    explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
-
-    virtual ~RasterizerCacheObject();
-
-    VAddr GetCpuAddr() const {
-        return cpu_addr;
-    }
-
-    /// Gets the size of the shader in guest memory, required for cache management
-    virtual std::size_t GetSizeInBytes() const = 0;
-
-    /// Sets whether the cached object should be considered registered
-    void SetIsRegistered(bool registered) {
-        is_registered = registered;
-    }
-
-    /// Returns true if the cached object is registered
-    bool IsRegistered() const {
-        return is_registered;
-    }
-
-    /// Returns true if the cached object is dirty
-    bool IsDirty() const {
-        return is_dirty;
-    }
-
-    /// Returns ticks from when this cached object was last modified
-    u64 GetLastModifiedTicks() const {
-        return last_modified_ticks;
-    }
-
-    /// Marks an object as recently modified, used to specify whether it is clean or dirty
-    template <class T>
-    void MarkAsModified(bool dirty, T& cache) {
-        is_dirty = dirty;
-        last_modified_ticks = cache.GetModifiedTicks();
-    }
-
-    void SetMemoryMarked(bool is_memory_marked_) {
-        is_memory_marked = is_memory_marked_;
-    }
-
-    bool IsMemoryMarked() const {
-        return is_memory_marked;
-    }
-
-    void SetSyncPending(bool is_sync_pending_) {
-        is_sync_pending = is_sync_pending_;
-    }
-
-    bool IsSyncPending() const {
-        return is_sync_pending;
-    }
-
-private:
-    bool is_registered{};      ///< Whether the object is currently registered with the cache
-    bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
-    bool is_memory_marked{};   ///< Whether the object is marking rasterizer memory.
-    bool is_sync_pending{};    ///< Whether the object is pending deletion.
-    u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
-    VAddr cpu_addr{};          ///< Cpu address memory, unique from emulated virtual address space
-};
-
-template <class T>
-class RasterizerCache : NonCopyable {
-    friend class RasterizerCacheObject;
-
-public:
-    explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
-
-    /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(VAddr addr, std::size_t size) {
-        std::lock_guard lock{mutex};
-
-        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
-        for (auto& object : objects) {
-            FlushObject(object);
-        }
-    }
-
-    /// Mark the specified region as being invalidated
-    void InvalidateRegion(VAddr addr, u64 size) {
-        std::lock_guard lock{mutex};
-
-        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
-        for (auto& object : objects) {
-            if (!object->IsRegistered()) {
-                // Skip duplicates
-                continue;
-            }
-            Unregister(object);
-        }
-    }
-
-    void OnCPUWrite(VAddr addr, std::size_t size) {
-        std::lock_guard lock{mutex};
-
-        for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
-            if (object->IsRegistered()) {
-                UnmarkMemory(object);
-                object->SetSyncPending(true);
-                marked_for_unregister.emplace_back(object);
-            }
-        }
-    }
-
-    void SyncGuestHost() {
-        std::lock_guard lock{mutex};
-
-        for (const auto& object : marked_for_unregister) {
-            if (object->IsRegistered()) {
-                object->SetSyncPending(false);
-                Unregister(object);
-            }
-        }
-        marked_for_unregister.clear();
-    }
-
-    /// Invalidates everything in the cache
-    void InvalidateAll() {
-        std::lock_guard lock{mutex};
-
-        while (interval_cache.begin() != interval_cache.end()) {
-            Unregister(*interval_cache.begin()->second.begin());
-        }
-    }
-
-protected:
-    /// Tries to get an object from the cache with the specified cache address
-    T TryGet(VAddr addr) const {
-        const auto iter = map_cache.find(addr);
-        if (iter != map_cache.end())
-            return iter->second;
-        return nullptr;
-    }
-
-    /// Register an object into the cache
-    virtual void Register(const T& object) {
-        std::lock_guard lock{mutex};
-
-        object->SetIsRegistered(true);
-        interval_cache.add({GetInterval(object), ObjectSet{object}});
-        map_cache.insert({object->GetCpuAddr(), object});
-        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
-        object->SetMemoryMarked(true);
-    }
-
-    /// Unregisters an object from the cache
-    virtual void Unregister(const T& object) {
-        std::lock_guard lock{mutex};
-
-        UnmarkMemory(object);
-        object->SetIsRegistered(false);
-        if (object->IsSyncPending()) {
-            marked_for_unregister.remove(object);
-            object->SetSyncPending(false);
-        }
-        const VAddr addr = object->GetCpuAddr();
-        interval_cache.subtract({GetInterval(object), ObjectSet{object}});
-        map_cache.erase(addr);
-    }
-
-    void UnmarkMemory(const T& object) {
-        if (!object->IsMemoryMarked()) {
-            return;
-        }
-        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
-        object->SetMemoryMarked(false);
-    }
-
-    /// Returns a ticks counter used for tracking when cached objects were last modified
-    u64 GetModifiedTicks() {
-        std::lock_guard lock{mutex};
-
-        return ++modified_ticks;
-    }
-
-    virtual void FlushObjectInner(const T& object) = 0;
-
-    /// Flushes the specified object, updating appropriate cache state as needed
-    void FlushObject(const T& object) {
-        std::lock_guard lock{mutex};
-
-        if (!object->IsDirty()) {
-            return;
-        }
-        FlushObjectInner(object);
-        object->MarkAsModified(false, *this);
-    }
-
-    std::recursive_mutex mutex;
-
-private:
-    /// Returns a list of cached objects from the specified memory region, ordered by access time
-    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
-        if (size == 0) {
-            return {};
-        }
-
-        std::vector<T> objects;
-        const ObjectInterval interval{addr, addr + size};
-        for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
-            for (auto& cached_object : pair.second) {
-                if (!cached_object) {
-                    continue;
-                }
-                objects.push_back(cached_object);
-            }
-        }
-
-        std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
-            return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
-        });
-
-        return objects;
-    }
-
-    using ObjectSet = std::set<T>;
-    using ObjectCache = std::unordered_map<VAddr, T>;
-    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
-    using ObjectInterval = typename IntervalCache::interval_type;
-
-    static auto GetInterval(const T& object) {
-        return ObjectInterval::right_open(object->GetCpuAddr(),
-                                          object->GetCpuAddr() + object->GetSizeInBytes());
-    }
-
-    ObjectCache map_cache;
-    IntervalCache interval_cache; ///< Cache of objects
-    u64 modified_ticks{};         ///< Counter of cache state ticks, used for in-order flushing
-    VideoCore::RasterizerInterface& rasterizer;
-    std::list<T> marked_for_unregister;
-};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 9964ea894..ad0577a4f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -22,13 +22,12 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
 
-CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
-    : VideoCommon::BufferBlock{cpu_addr, size} {
+Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} {
     gl_buffer.Create();
     glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
 }
 
-CachedBufferBlock::~CachedBufferBlock() = default;
+Buffer::~Buffer() = default;
 
 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
                                const Device& device, std::size_t stream_size)
@@ -48,12 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
     glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
 }
 
-Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
-    return std::make_shared<CachedBufferBlock>(cpu_addr, size);
-}
-
-GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
-    return buffer->GetHandle();
+std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+    return std::make_shared<Buffer>(cpu_addr, size);
 }
 
 GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
@@ -62,7 +57,7 @@ GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
 
 void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                                      const u8* data) {
-    glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
+    glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
                          static_cast<GLsizeiptr>(size), data);
 }
 
@@ -70,20 +65,20 @@ void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
                                        u8* data) {
     MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
     glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
-    glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
+    glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
                             static_cast<GLsizeiptr>(size), data);
 }
 
 void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
                                std::size_t dst_offset, std::size_t size) {
-    glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
+    glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset),
                              static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
 }
 
 OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
                                                              std::size_t size) {
     DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
-    const GLuint& cbuf = cbufs[cbuf_cursor++];
+    const GLuint cbuf = cbufs[cbuf_cursor++];
     glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
     return {cbuf, 0};
 }
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a9e86cfc7..a49aaf9c4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
 #include "common/common_types.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"
 
@@ -24,17 +23,12 @@ class Device;
 class OGLStreamBuffer;
 class RasterizerOpenGL;
 
-class CachedBufferBlock;
-
-using Buffer = std::shared_ptr<CachedBufferBlock>;
-using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
-
-class CachedBufferBlock : public VideoCommon::BufferBlock {
+class Buffer : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
-    ~CachedBufferBlock();
+    explicit Buffer(VAddr cpu_addr, const std::size_t size);
+    ~Buffer();
 
-    GLuint GetHandle() const {
+    GLuint Handle() const {
         return gl_buffer.handle;
     }
 
@@ -42,6 +36,7 @@ private:
     OGLBuffer gl_buffer;
 };
 
+using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
 class OGLBufferCache final : public GenericBufferCache {
 public:
     explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -55,9 +50,7 @@ public:
     }
 
 protected:
-    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
-
-    GLuint ToHandle(const Buffer& buffer) override;
+    std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
 
     void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                          const u8* data) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f802fd384..2d6c11320 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,6 +30,7 @@
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/shader_cache.h"
 
 namespace OpenGL {
 
@@ -65,10 +66,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16;
 template <typename Engine, typename Entry>
 Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
                                                ShaderType shader_type, std::size_t index = 0) {
+    if constexpr (std::is_same_v<Entry, SamplerEntry>) {
+        if (entry.is_separated) {
+            const u32 buffer_1 = entry.buffer;
+            const u32 buffer_2 = entry.secondary_buffer;
+            const u32 offset_1 = entry.offset;
+            const u32 offset_2 = entry.secondary_offset;
+            const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
+            const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
+            return engine.GetTextureInfo(handle_1 | handle_2);
+        }
+    }
     if (entry.is_bindless) {
-        const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
-        return engine.GetTextureInfo(tex_handle);
+        const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
+        return engine.GetTextureInfo(handle);
     }
+
     const auto& gpu_profile = engine.AccessGuestDriverProfile();
     const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
     if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
@@ -310,7 +323,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
             continue;
         }
 
-        Shader shader{shader_cache.GetStageProgram(program)};
+        Shader* const shader = shader_cache.GetStageProgram(program);
 
         if (device.UseAssemblyShaders()) {
             // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
@@ -604,7 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
                    (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
 
     // Prepare the vertex array.
-    buffer_cache.Map(buffer_size);
+    const bool invalidated = buffer_cache.Map(buffer_size);
+
+    if (invalidated) {
+        // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
+        auto& dirty = gpu.dirty.flags;
+        dirty[Dirty::VertexBuffers] = true;
+        for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
+            dirty[index] = true;
+        }
+    }
 
     // Prepare vertex array format.
     SetupVertexFormat();
@@ -870,7 +892,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
     return true;
 }
 
-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
     static constexpr std::array PARAMETER_LUT = {
         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
         GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
@@ -900,7 +922,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
     }
 }
 
-void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
     MICROPROFILE_SCOPE(OpenGL_UBO);
     const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
     const auto& entries = kernel->GetEntries();
@@ -969,7 +991,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
     }
 }
 
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
     auto& gpu{system.GPU()};
     auto& memory_manager{gpu.MemoryManager()};
     const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
@@ -984,7 +1006,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
     }
 }
 
-void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
     auto& gpu{system.GPU()};
     auto& memory_manager{gpu.MemoryManager()};
     const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
@@ -1007,7 +1029,7 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
                       static_cast<GLsizeiptr>(size));
 }
 
-void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
     MICROPROFILE_SCOPE(OpenGL_Texture);
     const auto& maxwell3d = system.GPU().Maxwell3D();
     u32 binding = device.GetBaseBindings(stage_index).sampler;
@@ -1020,7 +1042,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
     }
 }
 
-void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
     MICROPROFILE_SCOPE(OpenGL_Texture);
     const auto& compute = system.GPU().KeplerCompute();
     u32 binding = 0;
@@ -1049,7 +1071,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
     }
 }
 
-void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
     const auto& maxwell3d = system.GPU().Maxwell3D();
     u32 binding = device.GetBaseBindings(stage_index).image;
     for (const auto& entry : shader->GetEntries().images) {
@@ -1059,7 +1081,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
     }
 }
 
-void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
+void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
     const auto& compute = system.GPU().KeplerCompute();
     u32 binding = 0;
     for (const auto& entry : shader->GetEntries().images) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 7abc8fdbd..4f082592f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,7 +19,6 @@
 #include "video_core/engines/const_buffer_info.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_accelerated.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
@@ -100,10 +99,10 @@ private:
     void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
 
     /// Configures the current constbuffers to use for the draw command.
-    void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
+    void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
 
     /// Configures the current constbuffers to use for the kernel invocation.
-    void SetupComputeConstBuffers(const Shader& kernel);
+    void SetupComputeConstBuffers(Shader* kernel);
 
     /// Configures a constant buffer.
     void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
@@ -111,30 +110,30 @@ private:
                           std::size_t unified_offset);
 
     /// Configures the current global memory entries to use for the draw command.
-    void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
+    void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
 
     /// Configures the current global memory entries to use for the kernel invocation.
-    void SetupComputeGlobalMemory(const Shader& kernel);
+    void SetupComputeGlobalMemory(Shader* kernel);
 
     /// Configures a constant buffer.
     void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
                            std::size_t size);
 
     /// Configures the current textures to use for the draw command.
-    void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
+    void SetupDrawTextures(std::size_t stage_index, Shader* shader);
 
     /// Configures the textures used in a compute shader.
-    void SetupComputeTextures(const Shader& kernel);
+    void SetupComputeTextures(Shader* kernel);
 
     /// Configures a texture.
     void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
                       const SamplerEntry& entry);
 
     /// Configures images in a graphics shader.
-    void SetupDrawImages(std::size_t stage_index, const Shader& shader);
+    void SetupDrawImages(std::size_t stage_index, Shader* shader);
 
     /// Configures images in a compute shader.
-    void SetupComputeImages(const Shader& shader);
+    void SetupComputeImages(Shader* shader);
 
     /// Configures an image.
     void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f539a05e1..46e780a06 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -30,6 +30,7 @@
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
 
 namespace OpenGL {
 
@@ -196,12 +197,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
 
 } // Anonymous namespace
 
-CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
-                           std::shared_ptr<VideoCommon::Shader::Registry> registry,
-                           ShaderEntries entries, ProgramSharedPtr program_)
-    : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
-      size_in_bytes{size_in_bytes}, program{std::move(program_)} {
-    // Assign either the assembly program or source program. We can't have both.
+Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
+               ProgramSharedPtr program_)
+    : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
     handle = program->assembly_program.handle;
     if (handle == 0) {
         handle = program->source_program.handle;
@@ -209,16 +207,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
     ASSERT(handle != 0);
 }
 
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
 
-GLuint CachedShader::GetHandle() const {
+GLuint Shader::GetHandle() const {
     DEBUG_ASSERT(registry->IsConsistent());
     return handle;
 }
 
-Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
-                                           Maxwell::ShaderProgram program_type, ProgramCode code,
-                                           ProgramCode code_b) {
+std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
+                                                      Maxwell::ShaderProgram program_type,
+                                                      ProgramCode code, ProgramCode code_b) {
     const auto shader_type = GetShaderType(program_type);
     const std::size_t size_in_bytes = code.size() * sizeof(u64);
 
@@ -243,12 +241,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
-    return std::shared_ptr<CachedShader>(
-        new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
-                         MakeEntries(params.device, ir, shader_type), std::move(program)));
+    return std::unique_ptr<Shader>(new Shader(
+        std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
 }
 
-Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
+std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
+                                                       ProgramCode code) {
     const std::size_t size_in_bytes = code.size() * sizeof(u64);
 
     auto& engine = params.system.GPU().KeplerCompute();
@@ -268,23 +266,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
-    return std::shared_ptr<CachedShader>(
-        new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
-                         MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
+    return std::unique_ptr<Shader>(new Shader(std::move(registry),
+                                              MakeEntries(params.device, ir, ShaderType::Compute),
+                                              std::move(program)));
 }
 
-Shader CachedShader::CreateFromCache(const ShaderParameters& params,
-                                     const PrecompiledShader& precompiled_shader,
-                                     std::size_t size_in_bytes) {
-    return std::shared_ptr<CachedShader>(
-        new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
-                         precompiled_shader.entries, precompiled_shader.program));
+std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
+                                                const PrecompiledShader& precompiled_shader) {
+    return std::unique_ptr<Shader>(new Shader(
+        precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
 }
 
 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                                      Core::Frontend::EmuWindow& emu_window, const Device& device)
-    : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
-      disk_cache{system} {}
+    : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
+      emu_window{emu_window}, device{device}, disk_cache{system} {}
+
+ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
 
 void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
                                       const VideoCore::DiskResourceLoadCallback& callback) {
@@ -438,7 +436,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
     return program;
 }
 
-Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
+Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
         return last_shaders[static_cast<std::size_t>(program)];
     }
@@ -448,8 +446,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
     // Look up shader in the cache based on address
     const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
-    Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
-    if (shader) {
+    if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
         return last_shaders[static_cast<std::size_t>(program)] = shader;
     }
 
@@ -470,30 +467,29 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     const ShaderParameters params{system,    disk_cache, device,
                                   *cpu_addr, host_ptr,   unique_identifier};
 
+    std::unique_ptr<Shader> shader;
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
-        shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
-                                                     std::move(code_b));
+        shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
     } else {
-        const std::size_t size_in_bytes = code.size() * sizeof(u64);
-        shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+        shader = Shader::CreateFromCache(params, found->second);
     }
 
+    Shader* const result = shader.get();
     if (cpu_addr) {
-        Register(shader);
+        Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64));
     } else {
-        null_shader = shader;
+        null_shader = std::move(shader);
     }
 
-    return last_shaders[static_cast<std::size_t>(program)] = shader;
+    return last_shaders[static_cast<std::size_t>(program)] = result;
 }
 
-Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
+Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
     auto& memory_manager{system.GPU().MemoryManager()};
     const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
 
-    auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
-    if (kernel) {
+    if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
         return kernel;
     }
 
@@ -505,20 +501,21 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
     const ShaderParameters params{system,    disk_cache, device,
                                   *cpu_addr, host_ptr,   unique_identifier};
 
+    std::unique_ptr<Shader> kernel;
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
-        kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
+        kernel = Shader::CreateKernelFromMemory(params, std::move(code));
     } else {
-        const std::size_t size_in_bytes = code.size() * sizeof(u64);
-        kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+        kernel = Shader::CreateFromCache(params, found->second);
     }
 
+    Shader* const result = kernel.get();
     if (cpu_addr) {
-        Register(kernel);
+        Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64));
     } else {
-        null_kernel = kernel;
+        null_kernel = std::move(kernel);
     }
-    return kernel;
+    return result;
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b2ae8d7f9..6848f1388 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -18,12 +18,12 @@
 
 #include "common/common_types.h"
 #include "video_core/engines/shader_type.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
 
 namespace Core {
 class System;
@@ -35,12 +35,10 @@ class EmuWindow;
 
 namespace OpenGL {
 
-class CachedShader;
 class Device;
 class RasterizerOpenGL;
 struct UnspecializedShader;
 
-using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 struct ProgramHandle {
@@ -64,62 +62,53 @@ struct ShaderParameters {
     u64 unique_identifier;
 };
 
-class CachedShader final : public RasterizerCacheObject {
+class Shader final {
 public:
-    ~CachedShader();
+    ~Shader();
 
     /// Gets the GL program handle for the shader
     GLuint GetHandle() const;
 
-    /// Returns the size in bytes of the shader
-    std::size_t GetSizeInBytes() const override {
-        return size_in_bytes;
-    }
-
     /// Gets the shader entries for the shader
     const ShaderEntries& GetEntries() const {
         return entries;
     }
 
-    static Shader CreateStageFromMemory(const ShaderParameters& params,
-                                        Maxwell::ShaderProgram program_type,
-                                        ProgramCode program_code, ProgramCode program_code_b);
-    static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
+    static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
+                                                         Maxwell::ShaderProgram program_type,
+                                                         ProgramCode program_code,
+                                                         ProgramCode program_code_b);
+    static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
+                                                          ProgramCode code);
 
-    static Shader CreateFromCache(const ShaderParameters& params,
-                                  const PrecompiledShader& precompiled_shader,
-                                  std::size_t size_in_bytes);
+    static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
+                                                   const PrecompiledShader& precompiled_shader);
 
 private:
-    explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
-                          std::shared_ptr<VideoCommon::Shader::Registry> registry,
-                          ShaderEntries entries, ProgramSharedPtr program);
+    explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
+                    ProgramSharedPtr program);
 
     std::shared_ptr<VideoCommon::Shader::Registry> registry;
     ShaderEntries entries;
-    std::size_t size_in_bytes = 0;
     ProgramSharedPtr program;
     GLuint handle = 0;
 };
 
-class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
+class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
 public:
     explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                                Core::Frontend::EmuWindow& emu_window, const Device& device);
+    ~ShaderCacheOpenGL() override;
 
     /// Loads disk cache for the current game
     void LoadDiskCache(const std::atomic_bool& stop_loading,
                        const VideoCore::DiskResourceLoadCallback& callback);
 
     /// Gets the current specified shader stage program
-    Shader GetStageProgram(Maxwell::ShaderProgram program);
+    Shader* GetStageProgram(Maxwell::ShaderProgram program);
 
     /// Gets a compute kernel in the passed address
-    Shader GetComputeKernel(GPUVAddr code_addr);
-
-protected:
-    // We do not have to flush this cache as things in it are never modified by us.
-    void FlushObjectInner(const Shader& object) override {}
+    Shader* GetComputeKernel(GPUVAddr code_addr);
 
 private:
     ProgramSharedPtr GeneratePrecompiledProgram(
@@ -132,10 +121,10 @@ private:
     ShaderDiskCacheOpenGL disk_cache;
     std::unordered_map<u64, PrecompiledShader> runtime_cache;
 
-    Shader null_shader{};
-    Shader null_kernel{};
+    std::unique_ptr<Shader> null_shader;
+    std::unique_ptr<Shader> null_kernel;
 
-    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 9e95a122b..653c3f2f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
 
 namespace {
 
+using VideoCommon::Shader::SeparateSamplerKey;
+
 using ShaderCacheVersionHash = std::array<u8, 64>;
 
 struct ConstBufferKey {
@@ -37,18 +39,26 @@ struct ConstBufferKey {
     u32 value = 0;
 };
 
-struct BoundSamplerKey {
+struct BoundSamplerEntry {
     u32 offset = 0;
     Tegra::Engines::SamplerDescriptor sampler;
 };
 
-struct BindlessSamplerKey {
+struct SeparateSamplerEntry {
+    u32 cbuf1 = 0;
+    u32 cbuf2 = 0;
+    u32 offset1 = 0;
+    u32 offset2 = 0;
+    Tegra::Engines::SamplerDescriptor sampler;
+};
+
+struct BindlessSamplerEntry {
     u32 cbuf = 0;
     u32 offset = 0;
     Tegra::Engines::SamplerDescriptor sampler;
 };
 
-constexpr u32 NativeVersion = 20;
+constexpr u32 NativeVersion = 21;
 
 ShaderCacheVersionHash GetShaderCacheVersionHash() {
     ShaderCacheVersionHash hash{};
@@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
     u32 texture_handler_size_value;
     u32 num_keys;
     u32 num_bound_samplers;
+    u32 num_separate_samplers;
     u32 num_bindless_samplers;
     if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
         file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
         file.ReadArray(&texture_handler_size_value, 1) != 1 ||
         file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
         file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
+        file.ReadArray(&num_separate_samplers, 1) != 1 ||
         file.ReadArray(&num_bindless_samplers, 1) != 1) {
         return false;
     }
@@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
     }
 
     std::vector<ConstBufferKey> flat_keys(num_keys);
-    std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
-    std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
+    std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
+    std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
+    std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
     if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
         file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
             flat_bound_samplers.size() ||
+        file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
+            flat_separate_samplers.size() ||
         file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
             flat_bindless_samplers.size()) {
         return false;
     }
-    for (const auto& key : flat_keys) {
-        keys.insert({{key.cbuf, key.offset}, key.value});
+    for (const auto& entry : flat_keys) {
+        keys.insert({{entry.cbuf, entry.offset}, entry.value});
     }
-    for (const auto& key : flat_bound_samplers) {
-        bound_samplers.emplace(key.offset, key.sampler);
+    for (const auto& entry : flat_bound_samplers) {
+        bound_samplers.emplace(entry.offset, entry.sampler);
     }
-    for (const auto& key : flat_bindless_samplers) {
-        bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
+    for (const auto& entry : flat_separate_samplers) {
+        SeparateSamplerKey key;
+        key.buffers = {entry.cbuf1, entry.cbuf2};
+        key.offsets = {entry.offset1, entry.offset2};
+        separate_samplers.emplace(key, entry.sampler);
+    }
+    for (const auto& entry : flat_bindless_samplers) {
+        bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
     }
 
     return true;
@@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
         file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
         file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
         file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
+        file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
         file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
         return false;
     }
@@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
         flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
     }
 
-    std::vector<BoundSamplerKey> flat_bound_samplers;
+    std::vector<BoundSamplerEntry> flat_bound_samplers;
     flat_bound_samplers.reserve(bound_samplers.size());
     for (const auto& [address, sampler] : bound_samplers) {
-        flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
+        flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
+    }
+
+    std::vector<SeparateSamplerEntry> flat_separate_samplers;
+    flat_separate_samplers.reserve(separate_samplers.size());
+    for (const auto& [key, sampler] : separate_samplers) {
+        SeparateSamplerEntry entry;
+        std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
+        std::tie(entry.offset1, entry.offset2) = key.offsets;
+        entry.sampler = sampler;
+        flat_separate_samplers.push_back(entry);
     }
 
-    std::vector<BindlessSamplerKey> flat_bindless_samplers;
+    std::vector<BindlessSamplerEntry> flat_bindless_samplers;
     flat_bindless_samplers.reserve(bindless_samplers.size());
     for (const auto& [address, sampler] : bindless_samplers) {
         flat_bindless_samplers.push_back(
-            BindlessSamplerKey{address.first, address.second, sampler});
+            BindlessSamplerEntry{address.first, address.second, sampler});
     }
 
     return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
            file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
                flat_bound_samplers.size() &&
+           file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
+               flat_separate_samplers.size() &&
            file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
                flat_bindless_samplers.size();
 }
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index d5be52e40..a79cef0e9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
     VideoCommon::Shader::ComputeInfo compute_info;
     VideoCommon::Shader::KeyMap keys;
     VideoCommon::Shader::BoundSamplerMap bound_samplers;
+    VideoCommon::Shader::SeparateSamplerMap separate_samplers;
     VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
 };
 
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 6ec328c53..932a2f69e 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -49,14 +49,6 @@ OGLStreamBuffer::~OGLStreamBuffer() {
     gl_buffer.Release();
 }
 
-GLuint OGLStreamBuffer::GetHandle() const {
-    return gl_buffer.handle;
-}
-
-GLsizeiptr OGLStreamBuffer::GetSize() const {
-    return buffer_size;
-}
-
 std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
     ASSERT(size <= buffer_size);
     ASSERT(alignment <= buffer_size);
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index f8383cbd4..866da3594 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -17,9 +17,6 @@ public:
                              bool use_persistent = true);
     ~OGLStreamBuffer();
 
-    GLuint GetHandle() const;
-    GLsizeiptr GetSize() const;
-
     /*
      * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
      * and the optional alignment requirement.
@@ -32,6 +29,14 @@ public:
 
     void Unmap(GLsizeiptr size);
 
+    GLuint Handle() const {
+        return gl_buffer.handle;
+    }
+
+    GLsizeiptr Size() const {
+        return buffer_size;
+    }
+
 private:
     OGLBuffer gl_buffer;
 
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5f33d9e40..1fde38328 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -37,8 +37,8 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
 
 } // Anonymous namespace
 
-CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
-                                     VAddr cpu_addr, std::size_t size)
+Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr,
+               std::size_t size)
     : VideoCommon::BufferBlock{cpu_addr, size} {
     VkBufferCreateInfo ci;
     ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
@@ -54,7 +54,7 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me
     buffer.commit = memory_manager.Commit(buffer.handle, false);
 }
 
-CachedBufferBlock::~CachedBufferBlock() = default;
+Buffer::~Buffer() = default;
 
 VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
                              const VKDevice& device, VKMemoryManager& memory_manager,
@@ -67,12 +67,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
 
 VKBufferCache::~VKBufferCache() = default;
 
-Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
-    return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
-}
-
-VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
-    return buffer->GetHandle();
+std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+    return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size);
 }
 
 VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
@@ -91,7 +87,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
     std::memcpy(staging.commit->Map(size), data, size);
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
+    scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
                       size](vk::CommandBuffer cmdbuf) {
         cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
 
@@ -114,7 +110,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
                                       u8* data) {
     const auto& staging = staging_pool.GetUnusedBuffer(size, true);
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
+    scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
                       size](vk::CommandBuffer cmdbuf) {
         VkBufferMemoryBarrier barrier;
         barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@@ -141,8 +137,8 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
 void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
                               std::size_t dst_offset, std::size_t size) {
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
-                      dst_offset, size](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset,
+                      size](vk::CommandBuffer cmdbuf) {
         cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
 
         std::array<VkBufferMemoryBarrier, 2> barriers;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a54583e7d..9ebbef835 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,7 +8,6 @@
 
 #include "common/common_types.h"
 #include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
@@ -24,13 +23,13 @@ class VKDevice;
 class VKMemoryManager;
 class VKScheduler;
 
-class CachedBufferBlock final : public VideoCommon::BufferBlock {
+class Buffer final : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
-                               VAddr cpu_addr, std::size_t size);
-    ~CachedBufferBlock();
+    explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr,
+                    std::size_t size);
+    ~Buffer();
 
-    VkBuffer GetHandle() const {
+    VkBuffer Handle() const {
         return *buffer.handle;
     }
 
@@ -38,8 +37,6 @@ private:
     VKBuffer buffer;
 };
 
-using Buffer = std::shared_ptr<CachedBufferBlock>;
-
 class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
 public:
     explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
@@ -50,9 +47,7 @@ public:
     VkBuffer GetEmptyBuffer(std::size_t size) override;
 
 protected:
-    VkBuffer ToHandle(const Buffer& buffer) override;
-
-    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
+    std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
 
     void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                          const u8* data) override;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index b8ccf164f..ea66e621e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,6 +27,7 @@
 #include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/memory_util.h"
+#include "video_core/shader_cache.h"
 
 namespace Vulkan {
 
@@ -132,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
     return std::memcmp(&rhs, this, sizeof *this) == 0;
 }
 
-CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
-                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
-                           u32 main_offset)
-    : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
+Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+               VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
+    : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
       registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
                                                            compiler_settings, registry},
       entries{GenerateShaderEntries(shader_ir)} {}
 
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
 
-Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
-    Core::System& system, Tegra::Engines::ShaderType stage) {
-    if (stage == Tegra::Engines::ShaderType::Compute) {
+Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
+                                                              Tegra::Engines::ShaderType stage) {
+    if (stage == ShaderType::Compute) {
         return system.GPU().KeplerCompute();
     } else {
         return system.GPU().Maxwell3D();
@@ -156,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
                                  VKDescriptorPool& descriptor_pool,
                                  VKUpdateDescriptorQueue& update_descriptor_queue,
                                  VKRenderPassCache& renderpass_cache)
-    : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
-      descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
-      renderpass_cache{renderpass_cache} {}
+    : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
+      scheduler{scheduler}, descriptor_pool{descriptor_pool},
+      update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
 
 VKPipelineCache::~VKPipelineCache() = default;
 
-std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
     const auto& gpu = system.GPU().Maxwell3D();
 
-    std::array<Shader, Maxwell::MaxShaderProgram> shaders;
+    std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto program{static_cast<Maxwell::ShaderProgram>(index)};
 
@@ -178,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
         const GPUVAddr program_addr{GetShaderAddress(system, program)};
         const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
         ASSERT(cpu_addr);
-        auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
-        if (!shader) {
+
+        Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
+        if (!result) {
             const auto host_ptr{memory_manager.GetPointer(program_addr)};
 
             // No shader found - create a new one
             constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
-            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
+            const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
             ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+            const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+            auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
+                                                   stage_offset);
+            result = shader.get();
 
-            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
-                                                    std::move(code), stage_offset);
             if (cpu_addr) {
-                Register(shader);
+                Register(std::move(shader), *cpu_addr, size_in_bytes);
             } else {
-                null_shader = shader;
+                null_shader = std::move(shader);
             }
         }
-        shaders[index] = std::move(shader);
+        shaders[index] = result;
     }
     return last_shaders = shaders;
 }
@@ -236,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
     const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
     ASSERT(cpu_addr);
 
-    auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
+    Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
     if (!shader) {
         // No shader found - create a new one
         const auto host_ptr = memory_manager.GetPointer(program_addr);
 
         ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
-        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
-                                                program_addr, *cpu_addr, std::move(code),
-                                                KERNEL_MAIN_OFFSET);
+        const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+        auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
+                                                    std::move(code), KERNEL_MAIN_OFFSET);
+        shader = shader_info.get();
+
         if (cpu_addr) {
-            Register(shader);
+            Register(std::move(shader_info), *cpu_addr, size_in_bytes);
         } else {
-            null_kernel = shader;
+            null_kernel = std::move(shader_info);
         }
     }
 
@@ -264,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
     return *entry;
 }
 
-void VKPipelineCache::Unregister(const Shader& shader) {
+void VKPipelineCache::OnShaderRemoval(Shader* shader) {
     bool finished = false;
     const auto Finish = [&] {
         // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -296,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
         Finish();
         it = compute_cache.erase(it);
     }
-
-    RasterizerCache::Unregister(shader);
 }
 
 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
@@ -332,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
         }
 
         const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
-        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
-        const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
-        ASSERT(shader);
+        const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+        Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
 
         const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
-        const auto program_type = GetShaderType(program_enum);
+        const ShaderType program_type = GetShaderType(program_enum);
         const auto& entries = shader->GetEntries();
         program[stage] = {
             Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0b5796fef..0a36e5112 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,7 +17,6 @@
 #include "common/common_types.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -26,6 +25,7 @@
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
 
 namespace Core {
 class System;
@@ -41,8 +41,6 @@ class VKFence;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
 
-class CachedShader;
-using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 struct GraphicsPipelineCacheKey {
@@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
 
 namespace Vulkan {
 
-class CachedShader final : public RasterizerCacheObject {
+class Shader {
 public:
-    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-                          VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
-                          u32 main_offset);
-    ~CachedShader();
+    explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+                    VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
+    ~Shader();
 
     GPUVAddr GetGpuAddr() const {
         return gpu_addr;
     }
 
-    std::size_t GetSizeInBytes() const override {
-        return program_code.size() * sizeof(u64);
-    }
-
     VideoCommon::Shader::ShaderIR& GetIR() {
         return shader_ir;
     }
@@ -144,25 +137,23 @@ private:
     ShaderEntries entries;
 };
 
-class VKPipelineCache final : public RasterizerCache<Shader> {
+class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
 public:
     explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
                              const VKDevice& device, VKScheduler& scheduler,
                              VKDescriptorPool& descriptor_pool,
                              VKUpdateDescriptorQueue& update_descriptor_queue,
                              VKRenderPassCache& renderpass_cache);
-    ~VKPipelineCache();
+    ~VKPipelineCache() override;
 
-    std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+    std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
 
     VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
 
     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
 
 protected:
-    void Unregister(const Shader& shader) override;
-
-    void FlushObjectInner(const Shader& object) override {}
+    void OnShaderRemoval(Shader* shader) final;
 
 private:
     std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
@@ -175,10 +166,10 @@ private:
     VKUpdateDescriptorQueue& update_descriptor_queue;
     VKRenderPassCache& renderpass_cache;
 
-    Shader null_shader{};
-    Shader null_kernel{};
+    std::unique_ptr<Shader> null_shader;
+    std::unique_ptr<Shader> null_kernel;
 
-    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
 
     GraphicsPipelineCacheKey last_graphics_key;
     VKGraphicsPipeline* last_graphics_pipeline = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 19b8f9da3..184b2238a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -38,6 +38,7 @@
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader_cache.h"
 
 namespace Vulkan {
 
@@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
 }
 
 std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
-    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
     std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
     for (std::size_t i = 0; i < std::size(addresses); ++i) {
         addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -117,6 +118,17 @@ template <typename Engine, typename Entry>
 Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
                                                std::size_t stage, std::size_t index = 0) {
     const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
+    if constexpr (std::is_same_v<Entry, SamplerEntry>) {
+        if (entry.is_separated) {
+            const u32 buffer_1 = entry.buffer;
+            const u32 buffer_2 = entry.secondary_buffer;
+            const u32 offset_1 = entry.offset;
+            const u32 offset_2 = entry.secondary_offset;
+            const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
+            const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
+            return engine.GetTextureInfo(handle_1 | handle_2);
+        }
+    }
     if (entry.is_bindless) {
         const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
         return engine.GetTextureInfo(tex_handle);
@@ -776,12 +788,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
 }
 
 void RasterizerVulkan::SetupShaderDescriptors(
-    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
     texture_cache.GuardSamplers(true);
 
     for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
         // Skip VertexA stage
-        const auto& shader = shaders[stage + 1];
+        Shader* const shader = shaders[stage + 1];
         if (!shader) {
             continue;
         }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 04be37a5e..c8c187606 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -168,7 +168,7 @@ private:
                                  bool is_indexed, bool is_instanced);
 
     /// Setup descriptors in the graphics pipeline.
-    void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
+    void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
 
     void SetupImageTransitions(Texceptions texceptions,
                                const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index dfddf7ad6..c765c60a0 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -35,7 +35,7 @@ public:
     /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
     void Unmap(u64 size);
 
-    VkBuffer GetHandle() const {
+    VkBuffer Handle() const {
         return *buffer;
     }
 
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 8f0bb996e..29ebf65ba 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
-                                               std::optional<u32> buffer) {
+ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
+    SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
     if (info.IsComplete()) {
         return info;
     }
-    const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
-                                : registry.ObtainBoundSampler(offset);
     if (!sampler) {
         LOG_WARNING(HW_GPU, "Unknown sampler info");
         info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
@@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
 
 std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
                                             SamplerInfo sampler_info) {
-    const auto offset = static_cast<u32>(sampler.index.Value());
-    const auto info = GetSamplerInfo(sampler_info, offset);
+    const u32 offset = static_cast<u32>(sampler.index.Value());
+    const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
 
     // If this sampler has already been used, return the existing mapping.
     const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
@@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
     const Node sampler_register = GetRegister(reg);
     const auto [base_node, tracked_sampler_info] =
         TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
-    ASSERT(base_node != nullptr);
-    if (base_node == nullptr) {
+    if (!base_node) {
+        UNREACHABLE();
         return std::nullopt;
     }
 
-    if (const auto bindless_sampler_info =
-            std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
-        const u32 buffer = bindless_sampler_info->GetIndex();
-        const u32 offset = bindless_sampler_info->GetOffset();
-        info = GetSamplerInfo(info, offset, buffer);
+    if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+        const u32 buffer = sampler_info->index;
+        const u32 offset = sampler_info->offset;
+        info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
 
         // If this sampler has already been used, return the existing mapping.
         const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
-                                     [buffer = buffer, offset = offset](const Sampler& entry) {
+                                     [buffer, offset](const Sampler& entry) {
                                          return entry.buffer == buffer && entry.offset == offset;
                                      });
         if (it != used_samplers.end()) {
@@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
         return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
                                           *info.is_shadow, *info.is_buffer, false);
     }
-    if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
-        const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
-        index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
-        info = GetSamplerInfo(info, base_offset);
+    if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
+        const std::pair indices = sampler_info->indices;
+        const std::pair offsets = sampler_info->offsets;
+        info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
+
+        // Try to use an already created sampler if it exists
+        const auto it = std::find_if(
+            used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
+                return offsets == std::pair{entry.offset, entry.secondary_offset} &&
+                       indices == std::pair{entry.buffer, entry.secondary_buffer};
+            });
+        if (it != used_samplers.end()) {
+            ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
+                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
+            return *it;
+        }
+
+        // Otherwise create a new mapping for this sampler
+        const u32 next_index = static_cast<u32>(used_samplers.size());
+        return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
+                                          *info.is_shadow, *info.is_buffer);
+    }
+    if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+        const u32 base_offset = sampler_info->base_offset / 4;
+        index_var = GetCustomVariable(sampler_info->bindless_var);
+        info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
 
         // If this sampler has already been used, return the existing mapping.
         const auto it = std::find_if(
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c5e5165ff..8f230d57a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
 
-class BindlessSamplerNode;
-class ArraySamplerNode;
+struct ArraySamplerNode;
+struct BindlessSamplerNode;
+struct SeparateSamplerNode;
 
-using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
+using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
 using TrackSampler = std::shared_ptr<TrackSamplerData>;
 
 struct Sampler {
@@ -288,63 +289,51 @@ struct Sampler {
         : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
           is_buffer{is_buffer}, is_indexed{is_indexed} {}
 
+    /// Separate sampler constructor
+    constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
+                               Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
+                               bool is_buffer)
+        : index{index}, offset{offsets.first}, secondary_offset{offsets.second},
+          buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
+          is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
+
     /// Bindless samplers constructor
     constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
                                bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
         : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
           is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
 
-    u32 index = 0;  ///< Emulated index given for the this sampler.
-    u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
-    u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
-    u32 size = 1;   ///< Size of the sampler.
+    u32 index = 0;            ///< Emulated index given for the this sampler.
+    u32 offset = 0;           ///< Offset in the const buffer from where the sampler is being read.
+    u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
+    u32 buffer = 0;           ///< Buffer where the bindless sampler is read.
+    u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
+    u32 size = 1;             ///< Size of the sampler.
 
     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
-    bool is_array = false;    ///< Whether the texture is being sampled as an array texture or not.
-    bool is_shadow = false;   ///< Whether the texture is being sampled as a depth texture or not.
-    bool is_buffer = false;   ///< Whether the texture is a texture buffer without sampler.
-    bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
-    bool is_indexed = false;  ///< Whether this sampler is an indexed array of textures.
+    bool is_array = false;     ///< Whether the texture is being sampled as an array texture or not.
+    bool is_shadow = false;    ///< Whether the texture is being sampled as a depth texture or not.
+    bool is_buffer = false;    ///< Whether the texture is a texture buffer without sampler.
+    bool is_bindless = false;  ///< Whether this sampler belongs to a bindless texture or not.
+    bool is_indexed = false;   ///< Whether this sampler is an indexed array of textures.
+    bool is_separated = false; ///< Whether the image and sampler is separated or not.
 };
 
 /// Represents a tracked bindless sampler into a direct const buffer
-class ArraySamplerNode final {
-public:
-    explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
-        : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
-
-    constexpr u32 GetIndex() const {
-        return index;
-    }
-
-    constexpr u32 GetBaseOffset() const {
-        return base_offset;
-    }
-
-    constexpr u32 GetIndexVar() const {
-        return bindless_var;
-    }
-
-private:
+struct ArraySamplerNode {
     u32 index;
     u32 base_offset;
     u32 bindless_var;
 };
 
-/// Represents a tracked bindless sampler into a direct const buffer
-class BindlessSamplerNode final {
-public:
-    explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
-
-    constexpr u32 GetIndex() const {
-        return index;
-    }
-
-    constexpr u32 GetOffset() const {
-        return offset;
-    }
+/// Represents a tracked separate sampler image pair that was folded statically
+struct SeparateSamplerNode {
+    std::pair<u32, u32> indices;
+    std::pair<u32, u32> offsets;
+};
 
-private:
+/// Represents a tracked bindless sampler into a direct const buffer
+struct BindlessSamplerNode {
     u32 index;
     u32 offset;
 };
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 11231bbea..1e0886185 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
 template <typename T, typename... Args>
 TrackSampler MakeTrackSampler(Args&&... args) {
     static_assert(std::is_convertible_v<T, TrackSamplerData>);
-    return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
+    return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
 }
 
 template <typename... Args>
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index af70b3f35..cdf274e54 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
     return value;
 }
 
+std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
+    std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
+    SeparateSamplerKey key;
+    key.buffers = buffers;
+    key.offsets = offsets;
+    const auto iter = separate_samplers.find(key);
+    if (iter != separate_samplers.end()) {
+        return iter->second;
+    }
+    if (!engine) {
+        return std::nullopt;
+    }
+
+    const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
+    const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
+    const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
+    separate_samplers.emplace(key, value);
+    return value;
+}
+
 std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
                                                                                  u32 offset) {
     const std::pair key = {buffer, offset};
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 0c80d35fd..231206765 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -19,8 +19,39 @@
 
 namespace VideoCommon::Shader {
 
+struct SeparateSamplerKey {
+    std::pair<u32, u32> buffers;
+    std::pair<u32, u32> offsets;
+};
+
+} // namespace VideoCommon::Shader
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::Shader::SeparateSamplerKey> {
+    std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
+        return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
+                                key.offsets.second);
+    }
+};
+
+template <>
+struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
+    bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
+                    const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
+        return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
+    }
+};
+
+} // namespace std
+
+namespace VideoCommon::Shader {
+
 using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
 using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using SeparateSamplerMap =
+    std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
 using BindlessSamplerMap =
     std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
 
@@ -73,6 +104,9 @@ public:
 
     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
 
+    std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
+        std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
+
     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
 
     /// Inserts a key.
@@ -128,6 +162,7 @@ private:
     Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
     KeyMap keys;
     BoundSamplerMap bound_samplers;
+    SeparateSamplerMap separate_samplers;
     BindlessSamplerMap bindless_samplers;
     u32 bound_buffer;
     GraphicsInfo graphics_info;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 15ae152f2..3a98b2104 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -330,8 +330,8 @@ private:
     OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
 
     /// Queries the missing sampler info from the execution context.
-    SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset,
-                               std::optional<u32> buffer = std::nullopt);
+    SamplerInfo GetSamplerInfo(SamplerInfo info,
+                               std::optional<Tegra::Engines::SamplerDescriptor> sampler);
 
     /// Accesses a texture sampler.
     std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
@@ -409,8 +409,14 @@ private:
 
     std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
 
-    std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
-                                                        s64 cursor);
+    std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
+                                                       s64 cursor);
+
+    std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
+                                                             const OperationNode& operation,
+                                                             Node gpr, Node base_offset,
+                                                             Node tracked, const NodeBlock& code,
+                                                             s64 cursor);
 
     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
 
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index eb97bfd41..d5ed81442 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -14,6 +14,7 @@
 namespace VideoCommon::Shader {
 
 namespace {
+
 std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                                    OperationCode operation_code) {
     for (; cursor >= 0; --cursor) {
@@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
     if (const auto operation = std::get_if<OperationNode>(&*node)) {
         operation->SetAmendIndex(amend_index);
         return true;
-    } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+    }
+    if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
         conditional->SetAmendIndex(amend_index);
         return true;
     }
@@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
 
 } // Anonymous namespace
 
-std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
-                                                              s64 cursor) {
+std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
+                                                             s64 cursor) {
     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
+        const u32 cbuf_index = cbuf->GetIndex();
+
         // Constant buffer found, test if it's an immediate
         const auto& offset = cbuf->GetOffset();
         if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
-            auto track =
-                MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
+            auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
             return {tracked, track};
         }
         if (const auto operation = std::get_if<OperationNode>(&*offset)) {
             const u32 bound_buffer = registry.GetBoundBuffer();
-            if (bound_buffer != cbuf->GetIndex()) {
+            if (bound_buffer != cbuf_index) {
                 return {};
             }
-            const auto pair = DecoupleIndirectRead(*operation);
-            if (!pair) {
-                return {};
+            if (const std::optional pair = DecoupleIndirectRead(*operation)) {
+                auto [gpr, base_offset] = *pair;
+                return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
+                                                  code, cursor);
             }
-            auto [gpr, base_offset] = *pair;
-            const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
-            const auto& gpu_driver = registry.AccessGuestDriverProfile();
-            const u32 bindless_cv = NewCustomVariable();
-            Node op =
-                Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
-
-            const Node cv_node = GetCustomVariable(bindless_cv);
-            Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
-            const std::size_t amend_index = DeclareAmend(std::move(amend_op));
-            AmendNodeCv(amend_index, code[cursor]);
-            // TODO Implement Bindless Index custom variable
-            auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
-                                                            offset_inm->GetValue(), bindless_cv);
-            return {tracked, track};
         }
         return {};
     }
@@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
         return TrackBindlessSampler(source, code, new_cursor);
     }
     if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
-        for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
-            if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
-                std::get<0>(found)) {
-                // Cbuf found in operand.
+        const OperationNode& op = *operation;
+
+        const OperationCode opcode = operation->GetCode();
+        if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
+            ASSERT(op.GetOperandsCount() == 2);
+            auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
+            auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
+            if (node_a && node_b) {
+                auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
+                                                                   std::pair{offset_a, offset_b});
+                return {tracked, std::move(track)};
+            }
+        }
+        std::size_t i = op.GetOperandsCount();
+        while (i--) {
+            if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
+                // Constant buffer found in operand.
                 return found;
             }
         }
@@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
     return {};
 }
 
+std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
+    const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
+    const NodeBlock& code, s64 cursor) {
+    const auto offset_imm = std::get<ImmediateNode>(*base_offset);
+    const auto& gpu_driver = registry.AccessGuestDriverProfile();
+    const u32 bindless_cv = NewCustomVariable();
+    const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
+    Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
+
+    Node cv_node = GetCustomVariable(bindless_cv);
+    Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
+    const std::size_t amend_index = DeclareAmend(std::move(amend_op));
+    AmendNodeCv(amend_index, code[cursor]);
+
+    // TODO: Implement bindless index custom variable
+    auto track =
+        MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
+    return {tracked, track};
+}
+
 std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
                                                s64 cursor) const {
     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
new file mode 100644
index 000000000..a23c23886
--- /dev/null
+++ b/src/video_core/shader_cache.h
@@ -0,0 +1,228 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <class T>
+class ShaderCache {
+    static constexpr u64 PAGE_SHIFT = 14;
+
+    struct Entry {
+        VAddr addr_start;
+        VAddr addr_end;
+        T* data;
+
+        bool is_memory_marked = true;
+
+        constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
+            return start < addr_end && addr_start < end;
+        }
+    };
+
+public:
+    virtual ~ShaderCache() = default;
+
+    /// @brief Removes shaders inside a given region
+    /// @note Checks for ranges
+    /// @param addr Start address of the invalidation
+    /// @param size Number of bytes of the invalidation
+    void InvalidateRegion(VAddr addr, std::size_t size) {
+        std::scoped_lock lock{invalidation_mutex};
+        InvalidatePagesInRegion(addr, size);
+        RemovePendingShaders();
+    }
+
+    /// @brief Unmarks a memory region as cached and marks it for removal
+    /// @param addr Start address of the CPU write operation
+    /// @param size Number of bytes of the CPU write operation
+    void OnCPUWrite(VAddr addr, std::size_t size) {
+        std::lock_guard lock{invalidation_mutex};
+        InvalidatePagesInRegion(addr, size);
+    }
+
+    /// @brief Flushes delayed removal operations
+    void SyncGuestHost() {
+        std::scoped_lock lock{invalidation_mutex};
+        RemovePendingShaders();
+    }
+
+    /// @brief Tries to obtain a cached shader starting in a given address
+    /// @note Doesn't check for ranges, the given address has to be the start of the shader
+    /// @param addr Start address of the shader, this doesn't cache for region
+    /// @return Pointer to a valid shader, nullptr when nothing is found
+    T* TryGet(VAddr addr) const {
+        std::scoped_lock lock{lookup_mutex};
+
+        const auto it = lookup_cache.find(addr);
+        if (it == lookup_cache.end()) {
+            return nullptr;
+        }
+        return it->second->data;
+    }
+
+protected:
+    explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
+
+    /// @brief Register in the cache a given entry
+    /// @param data Shader to store in the cache
+    /// @param addr Start address of the shader that will be registered
+    /// @param size Size in bytes of the shader
+    void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
+        std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+
+        const VAddr addr_end = addr + size;
+        Entry* const entry = NewEntry(addr, addr_end, data.get());
+
+        const u64 page_end = addr_end >> PAGE_SHIFT;
+        for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+            invalidation_cache[page].push_back(entry);
+        }
+
+        storage.push_back(std::move(data));
+
+        rasterizer.UpdatePagesCachedCount(addr, size, 1);
+    }
+
+    /// @brief Called when a shader is going to be removed
+    /// @param shader Shader that will be removed
+    /// @pre invalidation_cache is locked
+    /// @pre lookup_mutex is locked
+    virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
+
+private:
+    /// @brief Invalidate pages in a given region
+    /// @pre invalidation_mutex is locked
+    void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
+        const VAddr addr_end = addr + size;
+        const u64 page_end = addr_end >> PAGE_SHIFT;
+        for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
+            const auto it = invalidation_cache.find(page);
+            if (it == invalidation_cache.end()) {
+                continue;
+            }
+
+            std::vector<Entry*>& entries = it->second;
+            InvalidatePageEntries(entries, addr, addr_end);
+
+            // If there's nothing else in this page, remove it to avoid overpopulating the hash map.
+            if (entries.empty()) {
+                invalidation_cache.erase(it);
+            }
+        }
+    }
+
+    /// @brief Remove shaders marked for deletion
+    /// @pre invalidation_mutex is locked
+    void RemovePendingShaders() {
+        if (marked_for_removal.empty()) {
+            return;
+        }
+        std::scoped_lock lock{lookup_mutex};
+
+        std::vector<T*> removed_shaders;
+        removed_shaders.reserve(marked_for_removal.size());
+
+        for (Entry* const entry : marked_for_removal) {
+            if (lookup_cache.erase(entry->addr_start) > 0) {
+                removed_shaders.push_back(entry->data);
+            }
+        }
+        marked_for_removal.clear();
+
+        if (!removed_shaders.empty()) {
+            RemoveShadersFromStorage(std::move(removed_shaders));
+        }
+    }
+
+    /// @brief Invalidates entries in a given range for the passed page
+    /// @param entries         Vector of entries in the page, it will be modified on overlaps
+    /// @param addr            Start address of the invalidation
+    /// @param addr_end        Non-inclusive end address of the invalidation
+    /// @pre invalidation_mutex is locked
+    void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
+        auto it = entries.begin();
+        while (it != entries.end()) {
+            Entry* const entry = *it;
+            if (!entry->Overlaps(addr, addr_end)) {
+                ++it;
+                continue;
+            }
+            UnmarkMemory(entry);
+            marked_for_removal.push_back(entry);
+
+            it = entries.erase(it);
+        }
+    }
+
+    /// @brief Unmarks an entry from the rasterizer cache
+    /// @param entry Entry to unmark from memory
+    void UnmarkMemory(Entry* entry) {
+        if (!entry->is_memory_marked) {
+            return;
+        }
+        entry->is_memory_marked = false;
+
+        const VAddr addr = entry->addr_start;
+        const std::size_t size = entry->addr_end - addr;
+        rasterizer.UpdatePagesCachedCount(addr, size, -1);
+    }
+
+    /// @brief Removes a vector of shaders from a list
+    /// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
+    /// @pre invalidation_mutex is locked
+    /// @pre lookup_mutex is locked
+    void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
+        // Remove duplicates
+        std::sort(removed_shaders.begin(), removed_shaders.end());
+        removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
+                              removed_shaders.end());
+
+        // Now that there are no duplicates, we can notify removals
+        for (T* const shader : removed_shaders) {
+            OnShaderRemoval(shader);
+        }
+
+        // Remove them from the cache
+        const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
+            return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
+                   removed_shaders.end();
+        };
+        storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
+    }
+
+    /// @brief Creates a new entry in the lookup cache and returns its pointer
+    /// @pre lookup_mutex is locked
+    Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
+        auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
+        Entry* const entry_pointer = entry.get();
+
+        lookup_cache.emplace(addr, std::move(entry));
+        return entry_pointer;
+    }
+
+    VideoCore::RasterizerInterface& rasterizer;
+
+    mutable std::mutex lookup_mutex;
+    std::mutex invalidation_mutex;
+
+    std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
+    std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
+    std::vector<std::unique_ptr<T>> storage;
+    std::vector<Entry*> marked_for_removal;
+};
+
+} // namespace VideoCommon
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 7e9073cc3..32c81dc70 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -631,13 +631,11 @@ void Config::ReadRendererValues() {
         static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt());
     Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool();
     Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
-    Settings::values.resolution_factor =
-        ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
     Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
     Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
     Settings::values.use_frame_limit =
         ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
-    Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
+    Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt();
     Settings::values.use_disk_shader_cache =
         ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
     const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
@@ -722,8 +720,6 @@ void Config::ReadUIValues() {
             .toString();
     UISettings::values.enable_discord_presence =
         ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool();
-    UISettings::values.screenshot_resolution_factor =
-        static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt());
     UISettings::values.select_user_on_boot =
         ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool();
 
@@ -1082,8 +1078,6 @@ void Config::SaveRendererValues() {
     WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0);
     WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false);
     WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
-    WriteSetting(QStringLiteral("resolution_factor"),
-                 static_cast<double>(Settings::values.resolution_factor), 1.0);
     WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
     WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
     WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
@@ -1159,8 +1153,6 @@ void Config::SaveUIValues() {
                  QString::fromUtf8(UISettings::themes[0].second));
     WriteSetting(QStringLiteral("enable_discord_presence"),
                  UISettings::values.enable_discord_presence, true);
-    WriteSetting(QStringLiteral("screenshot_resolution_factor"),
-                 UISettings::values.screenshot_resolution_factor, 0);
     WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot,
                  false);
 
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index ea667caef..304625cd7 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -19,47 +19,6 @@
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #endif
 
-namespace {
-enum class Resolution : int {
-    Auto,
-    Scale1x,
-    Scale2x,
-    Scale3x,
-    Scale4x,
-};
-
-float ToResolutionFactor(Resolution option) {
-    switch (option) {
-    case Resolution::Auto:
-        return 0.f;
-    case Resolution::Scale1x:
-        return 1.f;
-    case Resolution::Scale2x:
-        return 2.f;
-    case Resolution::Scale3x:
-        return 3.f;
-    case Resolution::Scale4x:
-        return 4.f;
-    }
-    return 0.f;
-}
-
-Resolution FromResolutionFactor(float factor) {
-    if (factor == 0.f) {
-        return Resolution::Auto;
-    } else if (factor == 1.f) {
-        return Resolution::Scale1x;
-    } else if (factor == 2.f) {
-        return Resolution::Scale2x;
-    } else if (factor == 3.f) {
-        return Resolution::Scale3x;
-    } else if (factor == 4.f) {
-        return Resolution::Scale4x;
-    }
-    return Resolution::Auto;
-}
-} // Anonymous namespace
-
 ConfigureGraphics::ConfigureGraphics(QWidget* parent)
     : QWidget(parent), ui(new Ui::ConfigureGraphics) {
     vulkan_device = Settings::values.vulkan_device;
@@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() {
 
     ui->api->setEnabled(runtime_lock);
     ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
-    ui->resolution_factor_combobox->setCurrentIndex(
-        static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
     ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
     ui->use_disk_shader_cache->setEnabled(runtime_lock);
     ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
@@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() {
 void ConfigureGraphics::ApplyConfiguration() {
     Settings::values.renderer_backend = GetCurrentGraphicsBackend();
     Settings::values.vulkan_device = vulkan_device;
-    Settings::values.resolution_factor =
-        ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
     Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
     Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
     Settings::values.use_asynchronous_gpu_emulation =
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index c816d6108..6e75447a5 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -85,46 +85,6 @@
          </widget>
         </item>
         <item>
-         <layout class="QHBoxLayout" name="horizontalLayout_2">
-          <item>
-           <widget class="QLabel" name="label">
-            <property name="text">
-             <string>Internal Resolution:</string>
-            </property>
-           </widget>
-          </item>
-          <item>
-           <widget class="QComboBox" name="resolution_factor_combobox">
-            <item>
-             <property name="text">
-              <string>Auto (Window Size)</string>
-             </property>
-            </item>
-            <item>
-             <property name="text">
-              <string>Native (1280x720)</string>
-             </property>
-            </item>
-            <item>
-             <property name="text">
-              <string>2x Native (2560x1440)</string>
-             </property>
-            </item>
-            <item>
-             <property name="text">
-              <string>3x Native (3840x2160)</string>
-             </property>
-            </item>
-            <item>
-             <property name="text">
-              <string>4x Native (5120x2880)</string>
-             </property>
-            </item>
-           </widget>
-          </item>
-         </layout>
-        </item>
-        <item>
          <layout class="QHBoxLayout" name="horizontalLayout_6">
           <item>
            <widget class="QLabel" name="ar_label">
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 270cccc77..4119d7907 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -689,10 +689,7 @@ void GMainWindow::InitializeHotkeys() {
                 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
                 UpdateStatusBar();
             });
-    // TODO: Remove this comment/static whenever the next major release of
-    // MSVC occurs and we make it a requirement (see:
-    // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
-    static constexpr u16 SPEED_LIMIT_STEP = 5;
+    constexpr u16 SPEED_LIMIT_STEP = 5;
     connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this),
             &QShortcut::activated, this, [&] {
                 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 7240270f5..659b9f701 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -380,8 +380,6 @@ void Config::ReadValues() {
     Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false);
     Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0);
 
-    Settings::values.resolution_factor =
-        static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
     Settings::values.aspect_ratio =
         static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
     Settings::values.max_anisotropy =
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6f53e9659..45c07ed5d 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -117,11 +117,6 @@ use_hw_renderer =
 # 0: Interpreter (slow), 1 (default): JIT (fast)
 use_shader_jit =
 
-# Resolution scale factor
-# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
-# factor for the Switch resolution
-resolution_factor =
-
 # Aspect ratio
 # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
 aspect_ratio =
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 3be58b15d..1566c2e3f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -116,8 +116,6 @@ void Config::ReadValues() {
     Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
 
     // Renderer
-    Settings::values.resolution_factor =
-        static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
     Settings::values.aspect_ratio =
         static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
     Settings::values.max_anisotropy =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index ca203b64d..41bbbbf60 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -21,11 +21,6 @@ use_hw_renderer =
 # 0: Interpreter (slow), 1 (default): JIT (fast)
 use_shader_jit =
 
-# Resolution scale factor
-# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
-# factor for the Switch resolution
-resolution_factor =
-
 # Aspect ratio
 # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
 aspect_ratio =