16 files changed, 229 insertions, 186 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 1f656ffa8..abdc593df 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1442,7 +1442,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
     }
     if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
         address_size =
-            static_cast<u32>(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size));
+            static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, address_size));
     }
     const u32 size = address_size; // TODO: Analyze stride and number of vertices
     vertex_buffers[index] = Binding{
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 050b11874..3c5317777 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -31,8 +31,10 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
     auto current_context = context.Acquire();
     VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
 
+    CommandDataContainer next;
+
     while (!stop_token.stop_requested()) {
-        CommandDataContainer next = state.queue.PopWait(stop_token);
+        state.queue.PopWait(next, stop_token);
         if (stop_token.stop_requested()) {
             break;
         }
@@ -116,7 +118,7 @@ u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
 
     std::unique_lock lk(state.write_lock);
     const u64 fence{++state.last_fence};
-    state.queue.Push(CommandDataContainer(std::move(command_data), fence, block));
+    state.queue.EmplaceWait(std::move(command_data), fence, block);
 
     if (block) {
         Common::CondvarWait(state.cv, lk, thread.get_stop_token(), [this, fence] {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 90bcb5958..43940bd6d 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,8 +10,8 @@
 #include <thread>
 #include <variant>
 
+#include "common/bounded_threadsafe_queue.h"
 #include "common/polyfill_thread.h"
-#include "common/threadsafe_queue.h"
 #include "video_core/framebuffer_config.h"
 
 namespace Tegra {
@@ -97,7 +97,7 @@ struct CommandDataContainer {
 
 /// Struct used to synchronize the GPU thread
 struct SynchState final {
-    using CommandQueue = Common::MPSCQueue<CommandDataContainer, true>;
+    using CommandQueue = Common::MPSCQueue<CommandDataContainer>;
     std::mutex write_lock;
     CommandQueue queue;
     u64 last_fence{};
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 82ad0477d..905505ca1 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -6,7 +6,7 @@
 #include <optional>
 #include <span>
 
-#include <boost/container_hash/hash.hpp>
+#include "common/container_hash.h"
 
 #include <fstream>
 #include "common/assert.h"
@@ -89,7 +89,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
 
         if (!mid_method.has_value()) {
             cache_info.lle_program = Compile(macro_code->second);
-            cache_info.hash = boost::hash_value(macro_code->second);
+            cache_info.hash = Common::HashValue(macro_code->second);
             if (Settings::values.dump_macros) {
                 Dump(cache_info.hash, macro_code->second);
             }
@@ -100,7 +100,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
             code.resize(macro_cached.size() - rebased_method);
             std::memcpy(code.data(), macro_cached.data() + rebased_method,
                         code.size() * sizeof(u32));
-            cache_info.hash = boost::hash_value(code);
+            cache_info.hash = Common::HashValue(code);
             cache_info.lle_program = Compile(code);
             if (Settings::values.dump_macros) {
                 Dump(cache_info.hash, code);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 83924475b..01fb5b546 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -22,7 +22,7 @@ std::atomic<size_t> MemoryManager::unique_identifier_generator{};
 
 MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
                              u64 page_bits_)
-    : system{system_}, memory{system.Memory()}, device_memory{system.DeviceMemory()},
+    : system{system_}, memory{system.ApplicationMemory()}, device_memory{system.DeviceMemory()},
       address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
       entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
                                            page_bits != big_page_bits ? page_bits : 0},
@@ -43,7 +43,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
 
     big_entries.resize(big_page_table_size / 32, 0);
     big_page_table_cpu.resize(big_page_table_size);
-    big_page_continous.resize(big_page_table_size / continous_bits, 0);
+    big_page_continuous.resize(big_page_table_size / continuous_bits, 0);
     entries.resize(page_table_size / 32, 0);
 }
 
@@ -85,17 +85,17 @@ PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
     return kind_map.GetValueAt(gpu_addr);
 }
 
-inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
-    const u64 entry_mask = big_page_continous[big_page_index / continous_bits];
-    const size_t sub_index = big_page_index % continous_bits;
+inline bool MemoryManager::IsBigPageContinuous(size_t big_page_index) const {
+    const u64 entry_mask = big_page_continuous[big_page_index / continuous_bits];
+    const size_t sub_index = big_page_index % continuous_bits;
     return ((entry_mask >> sub_index) & 0x1ULL) != 0;
 }
 
-inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value) {
-    const u64 continous_mask = big_page_continous[big_page_index / continous_bits];
-    const size_t sub_index = big_page_index % continous_bits;
-    big_page_continous[big_page_index / continous_bits] =
-        (~(1ULL << sub_index) & continous_mask) | (value ? 1ULL << sub_index : 0);
+inline void MemoryManager::SetBigPageContinuous(size_t big_page_index, bool value) {
+    const u64 continuous_mask = big_page_continuous[big_page_index / continuous_bits];
+    const size_t sub_index = big_page_index % continuous_bits;
+    big_page_continuous[big_page_index / continuous_bits] =
+        (~(1ULL << sub_index) & continuous_mask) | (value ? 1ULL << sub_index : 0);
 }
 
 template <MemoryManager::EntryType entry_type>
@@ -140,7 +140,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
             const auto index = PageEntryIndex<true>(current_gpu_addr);
             const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits);
             big_page_table_cpu[index] = sub_value;
-            const bool is_continous = ([&] {
+            const bool is_continuous = ([&] {
                 uintptr_t base_ptr{
                     reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))};
                 if (base_ptr == 0) {
@@ -156,7 +156,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
                 }
                 return true;
             })();
-            SetBigPageContinous(index, is_continous);
+            SetBigPageContinuous(index, is_continuous);
         }
         remaining_size -= big_page_size;
     }
@@ -378,7 +378,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
         if constexpr (is_safe) {
             rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
         }
-        if (!IsBigPageContinous(page_index)) [[unlikely]] {
+        if (!IsBigPageContinuous(page_index)) [[unlikely]] {
             memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
         } else {
             u8* physical = memory.GetPointer(cpu_addr_base);
@@ -427,7 +427,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
         if constexpr (is_safe) {
             rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
         }
-        if (!IsBigPageContinous(page_index)) [[unlikely]] {
+        if (!IsBigPageContinuous(page_index)) [[unlikely]] {
             memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount);
         } else {
             u8* physical = memory.GetPointer(cpu_addr_base);
@@ -512,7 +512,7 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
     return result;
 }
 
-size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
+size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
     std::optional<VAddr> old_page_addr{};
     size_t range_so_far = 0;
     bool result{false};
@@ -553,7 +553,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
 }
 
 size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
-    return kind_map.GetContinousSizeFrom(gpu_addr);
+    return kind_map.GetContinuousSizeFrom(gpu_addr);
 }
 
 void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
@@ -594,7 +594,7 @@ void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std
 bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
     if (GetEntry<true>(gpu_addr) == EntryType::Mapped) [[likely]] {
         size_t page_index = gpu_addr >> big_page_bits;
-        if (IsBigPageContinous(page_index)) [[likely]] {
+        if (IsBigPageContinuous(page_index)) [[likely]] {
             const std::size_t page{(page_index & big_page_mask) + size};
             return page <= big_page_size;
         }
@@ -608,7 +608,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
     return page <= Core::Memory::YUZU_PAGESIZE;
 }
 
-bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
+bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const {
     std::optional<VAddr> old_page_addr{};
     bool result{true};
     auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 51ae2de68..fbbe856c4 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -94,7 +94,7 @@ public:
     /**
      * Checks if a gpu region is mapped by a single range of cpu addresses.
      */
-    [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
+    [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const;
 
     /**
      * Checks if a gpu region is mapped entirely.
@@ -123,7 +123,7 @@ public:
     bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
                        VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
 
-    size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const;
+    size_t MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const;
 
     bool IsWithinGPUAddressRange(GPUVAddr gpu_addr) const {
         return gpu_addr < address_space_size;
@@ -158,8 +158,8 @@ private:
         }
     }
 
-    inline bool IsBigPageContinous(size_t big_page_index) const;
-    inline void SetBigPageContinous(size_t big_page_index, bool value);
+    inline bool IsBigPageContinuous(size_t big_page_index) const;
+    inline void SetBigPageContinuous(size_t big_page_index, bool value);
 
     template <bool is_gpu_address>
     void GetSubmappedRangeImpl(
@@ -213,10 +213,10 @@ private:
     Common::RangeMap<GPUVAddr, PTEKind> kind_map;
     Common::VirtualBuffer<u32> big_page_table_cpu;
 
-    std::vector<u64> big_page_continous;
+    std::vector<u64> big_page_continuous;
     std::vector<std::pair<VAddr, std::size_t>> page_stash{};
 
-    static constexpr size_t continous_bits = 64;
+    static constexpr size_t continuous_bits = 64;
 
     const size_t unique_identifier;
     std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index ca52e2389..5dce51be8 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -166,7 +166,7 @@ struct FormatTuple {
     {VK_FORMAT_R16G16_UINT, Attachable | Storage},             // R16G16_UINT
     {VK_FORMAT_R16G16_SINT, Attachable | Storage},             // R16G16_SINT
     {VK_FORMAT_R16G16_SNORM, Attachable | Storage},            // R16G16_SNORM
-    {VK_FORMAT_UNDEFINED},                                     // R32G32B32_FLOAT
+    {VK_FORMAT_R32G32B32_SFLOAT},                              // R32G32B32_FLOAT
     {VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable},              // A8B8G8R8_SRGB
     {VK_FORMAT_R8G8_UNORM, Attachable | Storage},              // R8G8_UNORM
     {VK_FORMAT_R8G8_SNORM, Attachable | Storage},              // R8G8_SNORM
@@ -234,11 +234,6 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
                          PixelFormat pixel_format) {
     ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples));
     FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)];
-    if (tuple.format == VK_FORMAT_UNDEFINED) {
-        UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format);
-        return FormatInfo{VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
-    }
-
     // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
     if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
         const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index e03685af1..b264e6ada 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -47,14 +47,15 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
 Scheduler::~Scheduler() = default;
 
 void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+    // When flushing, we only send data to the worker thread; no waiting is necessary.
     SubmitExecution(signal_semaphore, wait_semaphore);
     AllocateNewContext();
 }
 
 void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+    // When finishing, we need to wait for the submission to have executed on the device.
     const u64 presubmit_tick = CurrentTick();
     SubmitExecution(signal_semaphore, wait_semaphore);
-    WaitWorker();
     Wait(presubmit_tick);
     AllocateNewContext();
 }
@@ -63,8 +64,14 @@ void Scheduler::WaitWorker() {
     MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
     DispatchWork();
 
-    std::unique_lock lock{work_mutex};
-    wait_cv.wait(lock, [this] { return work_queue.empty(); });
+    // Ensure the queue is drained.
+    {
+        std::unique_lock ql{queue_mutex};
+        event_cv.wait(ql, [this] { return work_queue.empty(); });
+    }
+
+    // Now wait for execution to finish.
+    std::scoped_lock el{execution_mutex};
 }
 
 void Scheduler::DispatchWork() {
@@ -72,10 +79,10 @@ void Scheduler::DispatchWork() {
         return;
     }
     {
-        std::scoped_lock lock{work_mutex};
+        std::scoped_lock ql{queue_mutex};
         work_queue.push(std::move(chunk));
     }
-    work_cv.notify_one();
+    event_cv.notify_all();
     AcquireNewChunk();
 }
 
@@ -137,30 +144,55 @@ bool Scheduler::UpdateRescaling(bool is_rescaling) {
 
 void Scheduler::WorkerThread(std::stop_token stop_token) {
     Common::SetCurrentThreadName("VulkanWorker");
-    do {
+
+    const auto TryPopQueue{[this](auto& work) -> bool {
+        if (work_queue.empty()) {
+            return false;
+        }
+
+        work = std::move(work_queue.front());
+        work_queue.pop();
+        event_cv.notify_all();
+        return true;
+    }};
+
+    while (!stop_token.stop_requested()) {
         std::unique_ptr<CommandChunk> work;
-        bool has_submit{false};
+
         {
-            std::unique_lock lock{work_mutex};
-            if (work_queue.empty()) {
-                wait_cv.notify_all();
-            }
-            Common::CondvarWait(work_cv, lock, stop_token, [&] { return !work_queue.empty(); });
+            std::unique_lock lk{queue_mutex};
+
+            // Wait for work.
+            Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); });
+
+            // If we've been asked to stop, we're done.
             if (stop_token.stop_requested()) {
-                continue;
+                return;
             }
-            work = std::move(work_queue.front());
-            work_queue.pop();
 
-            has_submit = work->HasSubmit();
+            // Exchange lock ownership so that we take the execution lock before
+            // the queue lock goes out of scope. This allows us to force execution
+            // to complete in the next step.
+            std::exchange(lk, std::unique_lock{execution_mutex});
+
+            // Perform the work, tracking whether the chunk was a submission
+            // before executing.
+            const bool has_submit = work->HasSubmit();
             work->ExecuteAll(current_cmdbuf);
+
+            // If the chunk was a submission, reallocate the command buffer.
+            if (has_submit) {
+                AllocateWorkerCommandBuffer();
+            }
         }
-        if (has_submit) {
-            AllocateWorkerCommandBuffer();
+
+        {
+            std::scoped_lock rl{reserve_mutex};
+
+            // Recycle the chunk back to the reserve.
+            chunk_reserve.emplace_back(std::move(work));
         }
-        std::scoped_lock reserve_lock{reserve_mutex};
-        chunk_reserve.push_back(std::move(work));
-    } while (!stop_token.stop_requested());
+    }
 }
 
 void Scheduler::AllocateWorkerCommandBuffer() {
@@ -289,13 +321,16 @@ void Scheduler::EndRenderPass() {
 }
 
 void Scheduler::AcquireNewChunk() {
-    std::scoped_lock lock{reserve_mutex};
+    std::scoped_lock rl{reserve_mutex};
+
     if (chunk_reserve.empty()) {
+        // If we don't have anything reserved, we need to make a new chunk.
         chunk = std::make_unique<CommandChunk>();
-        return;
+    } else {
+        // Otherwise, we can just take from the reserve.
+        chunk = std::move(chunk_reserve.back());
+        chunk_reserve.pop_back();
     }
-    chunk = std::move(chunk_reserve.back());
-    chunk_reserve.pop_back();
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index bd4cb0f7e..8d75ce987 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -232,10 +232,10 @@ private:
 
     std::queue<std::unique_ptr<CommandChunk>> work_queue;
     std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
+    std::mutex execution_mutex;
     std::mutex reserve_mutex;
-    std::mutex work_mutex;
-    std::condition_variable_any work_cv;
-    std::condition_variable wait_cv;
+    std::mutex queue_mutex;
+    std::condition_variable_any event_cv;
     std::jthread worker_thread;
 };
 
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 08aa8ca33..5fc2b2fec 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -42,15 +42,15 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
                                        ComponentType blue, ComponentType alpha,
                                        bool is_srgb) noexcept {
     switch (Hash(format, red, green, blue, alpha, is_srgb)) {
-    case Hash(TextureFormat::A8R8G8B8, UNORM):
+    case Hash(TextureFormat::A8B8G8R8, UNORM):
         return PixelFormat::A8B8G8R8_UNORM;
-    case Hash(TextureFormat::A8R8G8B8, SNORM):
+    case Hash(TextureFormat::A8B8G8R8, SNORM):
         return PixelFormat::A8B8G8R8_SNORM;
-    case Hash(TextureFormat::A8R8G8B8, UINT):
+    case Hash(TextureFormat::A8B8G8R8, UINT):
         return PixelFormat::A8B8G8R8_UINT;
-    case Hash(TextureFormat::A8R8G8B8, SINT):
+    case Hash(TextureFormat::A8B8G8R8, SINT):
         return PixelFormat::A8B8G8R8_SINT;
-    case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB):
+    case Hash(TextureFormat::A8B8G8R8, UNORM, SRGB):
         return PixelFormat::A8B8G8R8_SRGB;
     case Hash(TextureFormat::B5G6R5, UNORM):
         return PixelFormat::B5G6R5_UNORM;
@@ -74,13 +74,13 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
         return PixelFormat::R8_UINT;
     case Hash(TextureFormat::R8, SINT):
         return PixelFormat::R8_SINT;
-    case Hash(TextureFormat::R8G8, UNORM):
+    case Hash(TextureFormat::G8R8, UNORM):
         return PixelFormat::R8G8_UNORM;
-    case Hash(TextureFormat::R8G8, SNORM):
+    case Hash(TextureFormat::G8R8, SNORM):
         return PixelFormat::R8G8_SNORM;
-    case Hash(TextureFormat::R8G8, UINT):
+    case Hash(TextureFormat::G8R8, UINT):
         return PixelFormat::R8G8_UINT;
-    case Hash(TextureFormat::R8G8, SINT):
+    case Hash(TextureFormat::G8R8, SINT):
         return PixelFormat::R8G8_SINT;
     case Hash(TextureFormat::R16G16B16A16, FLOAT):
         return PixelFormat::R16G16B16A16_FLOAT;
@@ -136,49 +136,49 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
         return PixelFormat::R32_SINT;
     case Hash(TextureFormat::E5B9G9R9, FLOAT):
         return PixelFormat::E5B9G9R9_FLOAT;
-    case Hash(TextureFormat::D32, FLOAT):
+    case Hash(TextureFormat::Z32, FLOAT):
         return PixelFormat::D32_FLOAT;
-    case Hash(TextureFormat::D16, UNORM):
+    case Hash(TextureFormat::Z16, UNORM):
         return PixelFormat::D16_UNORM;
-    case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
+    case Hash(TextureFormat::Z24S8, UINT, UNORM, UNORM, UNORM, LINEAR):
         return PixelFormat::S8_UINT_D24_UNORM;
-    case Hash(TextureFormat::S8D24, UINT, UNORM, UINT, UINT, LINEAR):
+    case Hash(TextureFormat::Z24S8, UINT, UNORM, UINT, UINT, LINEAR):
         return PixelFormat::S8_UINT_D24_UNORM;
-    case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
+    case Hash(TextureFormat::G24R8, UINT, UNORM, UNORM, UNORM, LINEAR):
         return PixelFormat::S8_UINT_D24_UNORM;
-    case Hash(TextureFormat::D24S8, UNORM, UINT, UINT, UINT, LINEAR):
+    case Hash(TextureFormat::S8Z24, UNORM, UINT, UINT, UINT, LINEAR):
         return PixelFormat::D24_UNORM_S8_UINT;
-    case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
+    case Hash(TextureFormat::Z32_X24S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
         return PixelFormat::D32_FLOAT_S8_UINT;
-    case Hash(TextureFormat::R32_B24G8, FLOAT, UINT, UNORM, UNORM, LINEAR):
+    case Hash(TextureFormat::R32B24G8, FLOAT, UINT, UNORM, UNORM, LINEAR):
         return PixelFormat::D32_FLOAT_S8_UINT;
-    case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
+    case Hash(TextureFormat::DXT1, UNORM, LINEAR):
         return PixelFormat::BC1_RGBA_UNORM;
-    case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
+    case Hash(TextureFormat::DXT1, UNORM, SRGB):
         return PixelFormat::BC1_RGBA_SRGB;
-    case Hash(TextureFormat::BC2, UNORM, LINEAR):
+    case Hash(TextureFormat::DXT23, UNORM, LINEAR):
         return PixelFormat::BC2_UNORM;
-    case Hash(TextureFormat::BC2, UNORM, SRGB):
+    case Hash(TextureFormat::DXT23, UNORM, SRGB):
         return PixelFormat::BC2_SRGB;
-    case Hash(TextureFormat::BC3, UNORM, LINEAR):
+    case Hash(TextureFormat::DXT45, UNORM, LINEAR):
         return PixelFormat::BC3_UNORM;
-    case Hash(TextureFormat::BC3, UNORM, SRGB):
+    case Hash(TextureFormat::DXT45, UNORM, SRGB):
         return PixelFormat::BC3_SRGB;
-    case Hash(TextureFormat::BC4, UNORM):
+    case Hash(TextureFormat::DXN1, UNORM):
         return PixelFormat::BC4_UNORM;
-    case Hash(TextureFormat::BC4, SNORM):
+    case Hash(TextureFormat::DXN1, SNORM):
         return PixelFormat::BC4_SNORM;
-    case Hash(TextureFormat::BC5, UNORM):
+    case Hash(TextureFormat::DXN2, UNORM):
         return PixelFormat::BC5_UNORM;
-    case Hash(TextureFormat::BC5, SNORM):
+    case Hash(TextureFormat::DXN2, SNORM):
         return PixelFormat::BC5_SNORM;
-    case Hash(TextureFormat::BC7, UNORM, LINEAR):
+    case Hash(TextureFormat::BC7U, UNORM, LINEAR):
         return PixelFormat::BC7_UNORM;
-    case Hash(TextureFormat::BC7, UNORM, SRGB):
+    case Hash(TextureFormat::BC7U, UNORM, SRGB):
         return PixelFormat::BC7_SRGB;
-    case Hash(TextureFormat::BC6H_SFLOAT, FLOAT):
+    case Hash(TextureFormat::BC6H_S16, FLOAT):
         return PixelFormat::BC6H_SFLOAT;
-    case Hash(TextureFormat::BC6H_UFLOAT, FLOAT):
+    case Hash(TextureFormat::BC6H_U16, FLOAT):
         return PixelFormat::BC6H_UFLOAT;
     case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
         return PixelFormat::ASTC_2D_4X4_UNORM;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c09eecd1a..ed5c768d8 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1176,13 +1176,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
     const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
     const bool broken_views = runtime.HasBrokenTextureViewFormats();
     const bool native_bgr = runtime.HasNativeBgr();
-    std::vector<ImageId> overlap_ids;
+    boost::container::small_vector<ImageId, 4> overlap_ids;
     std::unordered_set<ImageId> overlaps_found;
-    std::vector<ImageId> left_aliased_ids;
-    std::vector<ImageId> right_aliased_ids;
+    boost::container::small_vector<ImageId, 4> left_aliased_ids;
+    boost::container::small_vector<ImageId, 4> right_aliased_ids;
     std::unordered_set<ImageId> ignore_textures;
-    std::vector<ImageId> bad_overlap_ids;
-    std::vector<ImageId> all_siblings;
+    boost::container::small_vector<ImageId, 4> bad_overlap_ids;
+    boost::container::small_vector<ImageId, 4> all_siblings;
     const bool this_is_linear = info.type == ImageType::Linear;
     const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
         if (True(overlap.flags & ImageFlagBits::Remapped)) {
@@ -1269,7 +1269,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
     const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
     Image& new_image = slot_images[new_image_id];
 
-    if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+    if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
         new_image.flags |= ImageFlagBits::Sparse;
     }
 
@@ -1298,16 +1298,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
         Image& overlap = slot_images[overlap_id];
         if (True(overlap.flags & ImageFlagBits::GpuModified)) {
             new_image.flags |= ImageFlagBits::GpuModified;
-        }
-        const auto& resolution = Settings::values.resolution_info;
-        const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
-        const u32 up_scale = can_rescale ? resolution.up_scale : 1;
-        const u32 down_shift = can_rescale ? resolution.down_shift : 0;
-        auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
-        if (overlap.info.num_samples != new_image.info.num_samples) {
-            runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
-        } else {
-            runtime.CopyImage(new_image, overlap, std::move(copies));
+            const auto& resolution = Settings::values.resolution_info;
+            const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
+            const u32 up_scale = can_rescale ? resolution.up_scale : 1;
+            const u32 down_shift = can_rescale ? resolution.down_shift : 0;
+            auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
+            if (overlap.info.num_samples != new_image.info.num_samples) {
+                runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
+            } else {
+                runtime.CopyImage(new_image, overlap, std::move(copies));
+            }
         }
         if (True(overlap.flags & ImageFlagBits::Tracked)) {
             UntrackImage(overlap, overlap_id);
@@ -1616,37 +1616,38 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s
         return;
     }
     auto& gpu_page_table = gpu_page_table_storage[*storage_id];
-    ForEachGPUPage(gpu_addr, size, [this, gpu_page_table, &images, gpu_addr, size, func](u64 page) {
-        const auto it = gpu_page_table.find(page);
-        if (it == gpu_page_table.end()) {
-            if constexpr (BOOL_BREAK) {
-                return false;
-            } else {
-                return;
-            }
-        }
-        for (const ImageId image_id : it->second) {
-            Image& image = slot_images[image_id];
-            if (True(image.flags & ImageFlagBits::Picked)) {
-                continue;
-            }
-            if (!image.OverlapsGPU(gpu_addr, size)) {
-                continue;
-            }
-            image.flags |= ImageFlagBits::Picked;
-            images.push_back(image_id);
-            if constexpr (BOOL_BREAK) {
-                if (func(image_id, image)) {
-                    return true;
-                }
-            } else {
-                func(image_id, image);
-            }
-        }
-        if constexpr (BOOL_BREAK) {
-            return false;
-        }
-    });
+    ForEachGPUPage(gpu_addr, size,
+                   [this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) {
+                       const auto it = gpu_page_table.find(page);
+                       if (it == gpu_page_table.end()) {
+                           if constexpr (BOOL_BREAK) {
+                               return false;
+                           } else {
+                               return;
+                           }
+                       }
+                       for (const ImageId image_id : it->second) {
+                           Image& image = slot_images[image_id];
+                           if (True(image.flags & ImageFlagBits::Picked)) {
+                               continue;
+                           }
+                           if (!image.OverlapsGPU(gpu_addr, size)) {
+                               continue;
+                           }
+                           image.flags |= ImageFlagBits::Picked;
+                           images.push_back(image_id);
+                           if constexpr (BOOL_BREAK) {
+                               if (func(image_id, image)) {
+                                   return true;
+                               }
+                           } else {
+                               func(image_id, image);
+                           }
+                       }
+                       if constexpr (BOOL_BREAK) {
+                           return false;
+                       }
+                   });
     for (const ImageId image_id : images) {
         slot_images[image_id].flags &= ~ImageFlagBits::Picked;
     }
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 26649aebf..4a80a59f9 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -14,7 +14,7 @@ namespace Tegra::Texture {
 
 namespace {
 
-constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
+[[maybe_unused]] constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
     0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
     0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
     0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
@@ -52,11 +52,13 @@ constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
 } // Anonymous namespace
 
 std::array<float, 4> TSCEntry::BorderColor() const noexcept {
-    if (!srgb_conversion) {
-        return border_color;
-    }
-    return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
-            SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
+    // TODO: Handle SRGB correctly. Using this breaks shadows in some games (Xenoblade).
+    // if (!srgb_conversion) {
+    //    return border_color;
+    //}
+    // return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
+    //        SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
+    return border_color;
 }
 
 float TSCEntry::MaxAnisotropy() const noexcept {
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 7c4553a53..7e5837b20 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -15,26 +15,26 @@ enum class TextureFormat : u32 {
     R32G32B32 = 0x02,
     R16G16B16A16 = 0x03,
     R32G32 = 0x04,
-    R32_B24G8 = 0x05,
+    R32B24G8 = 0x05,
     ETC2_RGB = 0x06,
     X8B8G8R8 = 0x07,
-    A8R8G8B8 = 0x08,
+    A8B8G8R8 = 0x08,
     A2B10G10R10 = 0x09,
     ETC2_RGB_PTA = 0x0a,
     ETC2_RGBA = 0x0b,
     R16G16 = 0x0c,
-    R24G8 = 0x0d,
-    R8G24 = 0x0e,
+    G8R24 = 0x0d,
+    G24R8 = 0x0e,
     R32 = 0x0f,
-    BC6H_SFLOAT = 0x10,
-    BC6H_UFLOAT = 0x11,
+    BC6H_S16 = 0x10,
+    BC6H_U16 = 0x11,
     A4B4G4R4 = 0x12,
     A5B5G5R1 = 0x13,
     A1B5G5R5 = 0x14,
     B5G6R5 = 0x15,
     B6G5R5 = 0x16,
-    BC7 = 0x17,
-    R8G8 = 0x18,
+    BC7U = 0x17,
+    G8R8 = 0x18,
     EAC = 0x19,
     EACX2 = 0x1a,
     R16 = 0x1b,
@@ -46,33 +46,33 @@ enum class TextureFormat : u32 {
     B10G11R11 = 0x21,
     G8B8G8R8 = 0x22,
     B8G8R8G8 = 0x23,
-    BC1_RGBA = 0x24,
-    BC2 = 0x25,
-    BC3 = 0x26,
-    BC4 = 0x27,
-    BC5 = 0x28,
-    S8D24 = 0x29,
-    X8D24 = 0x2a,
-    D24S8 = 0x2b,
-    X4V4D24__COV4R4V = 0x2c,
-    X4V4D24__COV8R8V = 0x2d,
-    V8D24__COV4R12V = 0x2e,
-    D32 = 0x2f,
-    D32S8 = 0x30,
-    X8D24_X20V4S8__COV4R4V = 0x31,
-    X8D24_X20V4S8__COV8R8V = 0x32,
-    D32_X20V4X8__COV4R4V = 0x33,
-    D32_X20V4X8__COV8R8V = 0x34,
-    D32_X20V4S8__COV4R4V = 0x35,
-    D32_X20V4S8__COV8R8V = 0x36,
-    X8D24_X16V8S8__COV4R12V = 0x37,
-    D32_X16V8X8__COV4R12V = 0x38,
-    D32_X16V8S8__COV4R12V = 0x39,
-    D16 = 0x3a,
-    V8D24__COV8R24V = 0x3b,
-    X8D24_X16V8S8__COV8R24V = 0x3c,
-    D32_X16V8X8__COV8R24V = 0x3d,
-    D32_X16V8S8__COV8R24V = 0x3e,
+    DXT1 = 0x24,
+    DXT23 = 0x25,
+    DXT45 = 0x26,
+    DXN1 = 0x27,
+    DXN2 = 0x28,
+    Z24S8 = 0x29,
+    X8Z24 = 0x2a,
+    S8Z24 = 0x2b,
+    X4V4Z24__COV4R4V = 0x2c,
+    X4V4Z24__COV8R8V = 0x2d,
+    V8Z24__COV4R12V = 0x2e,
+    Z32 = 0x2f,
+    Z32_X24S8 = 0x30,
+    X8Z24_X20V4S8__COV4R4V = 0x31,
+    X8Z24_X20V4S8__COV8R8V = 0x32,
+    Z32_X20V4X8__COV4R4V = 0x33,
+    Z32_X20V4X8__COV8R8V = 0x34,
+    Z32_X20V4S8__COV4R4V = 0x35,
+    Z32_X20V4S8__COV8R8V = 0x36,
+    X8Z24_X16V8S8__COV4R12V = 0x37,
+    Z32_X16V8X8__COV4R12V = 0x38,
+    Z32_X16V8S8__COV4R12V = 0x39,
+    Z16 = 0x3a,
+    V8Z24__COV8R24V = 0x3b,
+    X8Z24_X16V8S8__COV8R24V = 0x3c,
+    Z32_X16V8X8__COV8R24V = 0x3d,
+    Z32_X16V8S8__COV8R24V = 0x3e,
     ASTC_2D_4X4 = 0x40,
     ASTC_2D_5X5 = 0x41,
     ASTC_2D_6X6 = 0x42,
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index fedb4a7bb..b42d48416 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -18,7 +18,7 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
     Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
     std::unique_ptr<Core::Frontend::GraphicsContext> context) {
     auto& telemetry_session = system.TelemetrySession();
-    auto& cpu_memory = system.Memory();
+    auto& cpu_memory = system.ApplicationMemory();
 
     switch (Settings::values.renderer_backend.GetValue()) {
     case Settings::RendererBackend::OpenGL:
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index df348af55..6f288b3f8 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -401,6 +401,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
             loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
         }
     }
+    if (extensions.extended_dynamic_state3 && is_radv) {
+        LOG_WARNING(Render_Vulkan, "RADV has broken extendedDynamicState3ColorBlendEquation");
+        features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
+        features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
+        dynamic_state3_blending = false;
+    }
     if (extensions.vertex_input_dynamic_state && is_radv) {
         // TODO(ameerj): Blacklist only offending driver versions
         // TODO(ameerj): Confirm if RDNA1 is affected
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 486d4dfaf..336f53700 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -375,6 +375,8 @@ const char* ToString(VkResult result) noexcept {
         return "VK_RESULT_MAX_ENUM";
     case VkResult::VK_ERROR_COMPRESSION_EXHAUSTED_EXT:
         return "VK_ERROR_COMPRESSION_EXHAUSTED_EXT";
+    case VkResult::VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT:
+        return "VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT";
     }
     return "Unknown";
 }