7 files changed, 411 insertions, 72 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 40e6d1ec4..cb8b46edf 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -82,6 +82,7 @@ add_library(video_core STATIC
     gpu_thread.h
     memory_manager.cpp
     memory_manager.h
+    pte_kind.h
     query_cache.h
     rasterizer_accelerated.cpp
     rasterizer_accelerated.h
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 3909d36c1..4eb7a100d 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -56,66 +56,85 @@ void MaxwellDMA::Launch() {
     ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
     ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
 
-    const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
-    const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
-
-    if (!is_src_pitch && !is_dst_pitch) {
-        // If both the source and the destination are in block layout, assert.
-        UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
-        return;
-    }
+    if (launch.multi_line_enable) {
+        const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+        const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+
+        if (!is_src_pitch && !is_dst_pitch) {
+            // If both the source and the destination are in block layout, assert.
+            UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
+            return;
+        }
 
-    if (is_src_pitch && is_dst_pitch) {
-        CopyPitchToPitch();
+        if (is_src_pitch && is_dst_pitch) {
+            for (u32 line = 0; line < regs.line_count; ++line) {
+                const GPUVAddr source_line =
+                    regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
+                const GPUVAddr dest_line =
+                    regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
+                memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
+            }
+        } else {
+            if (!is_src_pitch && is_dst_pitch) {
+                CopyBlockLinearToPitch();
+            } else {
+                CopyPitchToBlockLinear();
+            }
+        }
     } else {
-        ASSERT(launch.multi_line_enable == 1);
-
-        if (!is_src_pitch && is_dst_pitch) {
-            CopyBlockLinearToPitch();
+        // TODO: allow multisized components.
+        auto& accelerate = rasterizer->AccessAccelerateDMA();
+        const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
+        if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
+            ASSERT(regs.remap_const.component_size_minus_one == 3);
+            accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
+            std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
+            memory_manager.WriteBlockUnsafe(regs.offset_out,
+                                            reinterpret_cast<u8*>(tmp_buffer.data()),
+                                            regs.line_length_in * sizeof(u32));
         } else {
-            CopyPitchToBlockLinear();
+            auto convert_linear_2_blocklinear_addr = [](u64 address) {
+                return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
+                       ((address & 0x180) >> 1) | ((address & 0x20) << 3);
+            };
+            auto src_kind = memory_manager.GetPageKind(regs.offset_in);
+            auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
+            const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
+            const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
+            if (!is_src_pitch && is_dst_pitch) {
+                std::vector<u8> tmp_buffer(regs.line_length_in);
+                std::vector<u8> dst_buffer(regs.line_length_in);
+                memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+                                               regs.line_length_in);
+                for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
+                    dst_buffer[offset] =
+                        tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) -
+                                   regs.offset_in];
+                }
+                memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
+            } else if (is_src_pitch && !is_dst_pitch) {
+                std::vector<u8> tmp_buffer(regs.line_length_in);
+                std::vector<u8> dst_buffer(regs.line_length_in);
+                memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+                                               regs.line_length_in);
+                for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
+                    dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) -
+                               regs.offset_out] = tmp_buffer[offset];
+                }
+                memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
+            } else {
+                if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
+                    std::vector<u8> tmp_buffer(regs.line_length_in);
+                    memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+                                                   regs.line_length_in);
+                    memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
+                                              regs.line_length_in);
+                }
+            }
         }
     }
-    ReleaseSemaphore();
-}
 
-void MaxwellDMA::CopyPitchToPitch() {
-    // When `multi_line_enable` bit is enabled we copy a 2D image of dimensions
-    // (line_length_in, line_count).
-    // Otherwise the copy is performed as if we were copying a 1D buffer of length line_length_in.
-    const bool remap_enabled = regs.launch_dma.remap_enable != 0;
-    if (regs.launch_dma.multi_line_enable) {
-        UNIMPLEMENTED_IF(remap_enabled);
-
-        // Perform a line-by-line copy.
-        // We're going to take a subrect of size (line_length_in, line_count) from the source
-        // rectangle. There is no need to manually flush/invalidate the regions because CopyBlock
-        // does that for us.
-        for (u32 line = 0; line < regs.line_count; ++line) {
-            const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
-            const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
-            memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
-        }
-        return;
-    }
-    // TODO: allow multisized components.
-    auto& accelerate = rasterizer->AccessAccelerateDMA();
-    const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
-    const bool is_buffer_clear = remap_enabled && is_const_a_dst;
-    if (is_buffer_clear) {
-        ASSERT(regs.remap_const.component_size_minus_one == 3);
-        accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
-        std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
-        memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()),
-                                        regs.line_length_in * sizeof(u32));
-        return;
-    }
-    UNIMPLEMENTED_IF(remap_enabled);
-    if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
-        std::vector<u8> tmp_buffer(regs.line_length_in);
-        memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in);
-        memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in);
-    }
+    ReleaseSemaphore();
 }
 
 void MaxwellDMA::CopyBlockLinearToPitch() {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index bc48320ce..953e34adc 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -219,8 +219,6 @@ private:
     /// registers.
     void Launch();
 
-    void CopyPitchToPitch();
-
     void CopyBlockLinearToPitch();
 
     void CopyPitchToBlockLinear();
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index cca401c74..d07b21bd6 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -41,7 +41,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
     big_entries.resize(big_page_table_size / 32, 0);
     big_page_table_cpu.resize(big_page_table_size);
     big_page_continous.resize(big_page_table_size / continous_bits, 0);
+    std::array<PTEKind, 32> kind_valus;
+    kind_valus.fill(PTEKind::INVALID);
+    big_kinds.resize(big_page_table_size / 32, kind_valus);
     entries.resize(page_table_size / 32, 0);
+    kinds.resize(big_page_table_size / 32, kind_valus);
 }
 
 MemoryManager::~MemoryManager() = default;
@@ -78,6 +82,41 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
     }
 }
 
+PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
+    auto entry = GetEntry<true>(gpu_addr);
+    if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] {
+        return GetKind<true>(gpu_addr);
+    } else {
+        return GetKind<false>(gpu_addr);
+    }
+}
+
+template <bool is_big_page>
+PTEKind MemoryManager::GetKind(size_t position) const {
+    if constexpr (is_big_page) {
+        position = position >> big_page_bits;
+        const size_t sub_index = position % 32;
+        return big_kinds[position / 32][sub_index];
+    } else {
+        position = position >> page_bits;
+        const size_t sub_index = position % 32;
+        return kinds[position / 32][sub_index];
+    }
+}
+
+template <bool is_big_page>
+void MemoryManager::SetKind(size_t position, PTEKind kind) {
+    if constexpr (is_big_page) {
+        position = position >> big_page_bits;
+        const size_t sub_index = position % 32;
+        big_kinds[position / 32][sub_index] = kind;
+    } else {
+        position = position >> page_bits;
+        const size_t sub_index = position % 32;
+        kinds[position / 32][sub_index] = kind;
+    }
+}
+
 inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
     const u64 entry_mask = big_page_continous[big_page_index / continous_bits];
     const size_t sub_index = big_page_index % continous_bits;
@@ -92,8 +131,8 @@ inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value
 }
 
 template <MemoryManager::EntryType entry_type>
-GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
-                                    size_t size) {
+GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
+                                    PTEKind kind) {
     u64 remaining_size{size};
     if constexpr (entry_type == EntryType::Mapped) {
         page_table.ReserveRange(gpu_addr, size);
@@ -102,6 +141,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
         const GPUVAddr current_gpu_addr = gpu_addr + offset;
         [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
         SetEntry<false>(current_gpu_addr, entry_type);
+        SetKind<false>(current_gpu_addr, kind);
         if (current_entry_type != entry_type) {
             rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
         }
@@ -118,12 +158,13 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
 
 template <MemoryManager::EntryType entry_type>
 GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
-                                       size_t size) {
+                                       size_t size, PTEKind kind) {
     u64 remaining_size{size};
     for (u64 offset{}; offset < size; offset += big_page_size) {
         const GPUVAddr current_gpu_addr = gpu_addr + offset;
         [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
         SetEntry<true>(current_gpu_addr, entry_type);
+        SetKind<true>(current_gpu_addr, kind);
         if (current_entry_type != entry_type) {
             rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
         }
@@ -159,19 +200,19 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
     rasterizer = rasterizer_;
 }
 
-GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
+GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind,
                             bool is_big_pages) {
     if (is_big_pages) [[likely]] {
-        return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
+        return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
     }
-    return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
+    return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
 }
 
 GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
     if (is_big_pages) [[likely]] {
-        return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
+        return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
     }
-    return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
+    return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
 }
 
 void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
@@ -188,8 +229,8 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
         rasterizer->UnmapMemory(*cpu_addr, map_size);
     }
 
-    BigPageTableOp<EntryType::Free>(gpu_addr, 0, size);
-    PageTableOp<EntryType::Free>(gpu_addr, 0, size);
+    BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
+    PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
 }
 
 std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index f992e29f3..ab4bc9ec6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -11,6 +11,7 @@
 #include "common/common_types.h"
 #include "common/multi_level_page_table.h"
 #include "common/virtual_buffer.h"
+#include "video_core/pte_kind.h"
 
 namespace VideoCore {
 class RasterizerInterface;
@@ -98,7 +99,8 @@ public:
     std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
                                                                     std::size_t size) const;
 
-    GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true);
+    GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
+                 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
     GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
     void Unmap(GPUVAddr gpu_addr, std::size_t size);
 
@@ -114,6 +116,8 @@ public:
         return gpu_addr < address_space_size;
     }
 
+    PTEKind GetPageKind(GPUVAddr gpu_addr) const;
+
 private:
     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
     inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
@@ -166,10 +170,12 @@ private:
     std::vector<u64> big_entries;
 
     template <EntryType entry_type>
-    GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+    GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
+                         PTEKind kind);
 
     template <EntryType entry_type>
-    GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+    GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
+                            PTEKind kind);
 
     template <bool is_big_page>
     inline EntryType GetEntry(size_t position) const;
@@ -177,6 +183,15 @@ private:
     template <bool is_big_page>
     inline void SetEntry(size_t position, EntryType entry);
 
+    std::vector<std::array<PTEKind, 32>> kinds;
+    std::vector<std::array<PTEKind, 32>> big_kinds;
+
+    template <bool is_big_page>
+    inline PTEKind GetKind(size_t position) const;
+
+    template <bool is_big_page>
+    inline void SetKind(size_t position, PTEKind kind);
+
     Common::MultiLevelPageTable<u32> page_table;
     Common::VirtualBuffer<u32> big_page_table_cpu;
 
diff --git a/src/video_core/pte_kind.h b/src/video_core/pte_kind.h
new file mode 100644
index 000000000..591d7214b
--- /dev/null
+++ b/src/video_core/pte_kind.h
@@ -0,0 +1,264 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+// https://github.com/NVIDIA/open-gpu-doc/blob/master/manuals/volta/gv100/dev_mmu.ref.txt
+enum class PTEKind : u8 {
+    INVALID = 0xff,
+    PITCH = 0x00,
+    Z16 = 0x01,
+    Z16_2C = 0x02,
+    Z16_MS2_2C = 0x03,
+    Z16_MS4_2C = 0x04,
+    Z16_MS8_2C = 0x05,
+    Z16_MS16_2C = 0x06,
+    Z16_2Z = 0x07,
+    Z16_MS2_2Z = 0x08,
+    Z16_MS4_2Z = 0x09,
+    Z16_MS8_2Z = 0x0a,
+    Z16_MS16_2Z = 0x0b,
+    Z16_2CZ = 0x36,
+    Z16_MS2_2CZ = 0x37,
+    Z16_MS4_2CZ = 0x38,
+    Z16_MS8_2CZ = 0x39,
+    Z16_MS16_2CZ = 0x5f,
+    Z16_4CZ = 0x0c,
+    Z16_MS2_4CZ = 0x0d,
+    Z16_MS4_4CZ = 0x0e,
+    Z16_MS8_4CZ = 0x0f,
+    Z16_MS16_4CZ = 0x10,
+    S8Z24 = 0x11,
+    S8Z24_1Z = 0x12,
+    S8Z24_MS2_1Z = 0x13,
+    S8Z24_MS4_1Z = 0x14,
+    S8Z24_MS8_1Z = 0x15,
+    S8Z24_MS16_1Z = 0x16,
+    S8Z24_2CZ = 0x17,
+    S8Z24_MS2_2CZ = 0x18,
+    S8Z24_MS4_2CZ = 0x19,
+    S8Z24_MS8_2CZ = 0x1a,
+    S8Z24_MS16_2CZ = 0x1b,
+    S8Z24_2CS = 0x1c,
+    S8Z24_MS2_2CS = 0x1d,
+    S8Z24_MS4_2CS = 0x1e,
+    S8Z24_MS8_2CS = 0x1f,
+    S8Z24_MS16_2CS = 0x20,
+    S8Z24_4CSZV = 0x21,
+    S8Z24_MS2_4CSZV = 0x22,
+    S8Z24_MS4_4CSZV = 0x23,
+    S8Z24_MS8_4CSZV = 0x24,
+    S8Z24_MS16_4CSZV = 0x25,
+    V8Z24_MS4_VC12 = 0x26,
+    V8Z24_MS4_VC4 = 0x27,
+    V8Z24_MS8_VC8 = 0x28,
+    V8Z24_MS8_VC24 = 0x29,
+    V8Z24_MS4_VC12_1ZV = 0x2e,
+    V8Z24_MS4_VC4_1ZV = 0x2f,
+    V8Z24_MS8_VC8_1ZV = 0x30,
+    V8Z24_MS8_VC24_1ZV = 0x31,
+    V8Z24_MS4_VC12_2CS = 0x32,
+    V8Z24_MS4_VC4_2CS = 0x33,
+    V8Z24_MS8_VC8_2CS = 0x34,
+    V8Z24_MS8_VC24_2CS = 0x35,
+    V8Z24_MS4_VC12_2CZV = 0x3a,
+    V8Z24_MS4_VC4_2CZV = 0x3b,
+    V8Z24_MS8_VC8_2CZV = 0x3c,
+    V8Z24_MS8_VC24_2CZV = 0x3d,
+    V8Z24_MS4_VC12_2ZV = 0x3e,
+    V8Z24_MS4_VC4_2ZV = 0x3f,
+    V8Z24_MS8_VC8_2ZV = 0x40,
+    V8Z24_MS8_VC24_2ZV = 0x41,
+    V8Z24_MS4_VC12_4CSZV = 0x42,
+    V8Z24_MS4_VC4_4CSZV = 0x43,
+    V8Z24_MS8_VC8_4CSZV = 0x44,
+    V8Z24_MS8_VC24_4CSZV = 0x45,
+    Z24S8 = 0x46,
+    Z24S8_1Z = 0x47,
+    Z24S8_MS2_1Z = 0x48,
+    Z24S8_MS4_1Z = 0x49,
+    Z24S8_MS8_1Z = 0x4a,
+    Z24S8_MS16_1Z = 0x4b,
+    Z24S8_2CS = 0x4c,
+    Z24S8_MS2_2CS = 0x4d,
+    Z24S8_MS4_2CS = 0x4e,
+    Z24S8_MS8_2CS = 0x4f,
+    Z24S8_MS16_2CS = 0x50,
+    Z24S8_2CZ = 0x51,
+    Z24S8_MS2_2CZ = 0x52,
+    Z24S8_MS4_2CZ = 0x53,
+    Z24S8_MS8_2CZ = 0x54,
+    Z24S8_MS16_2CZ = 0x55,
+    Z24S8_4CSZV = 0x56,
+    Z24S8_MS2_4CSZV = 0x57,
+    Z24S8_MS4_4CSZV = 0x58,
+    Z24S8_MS8_4CSZV = 0x59,
+    Z24S8_MS16_4CSZV = 0x5a,
+    Z24V8_MS4_VC12 = 0x5b,
+    Z24V8_MS4_VC4 = 0x5c,
+    Z24V8_MS8_VC8 = 0x5d,
+    Z24V8_MS8_VC24 = 0x5e,
+    YUV_B8C1_2Y = 0x60,
+    YUV_B8C2_2Y = 0x61,
+    YUV_B10C1_2Y = 0x62,
+    YUV_B10C2_2Y = 0x6b,
+    YUV_B12C1_2Y = 0x6c,
+    YUV_B12C2_2Y = 0x6d,
+    Z24V8_MS4_VC12_1ZV = 0x63,
+    Z24V8_MS4_VC4_1ZV = 0x64,
+    Z24V8_MS8_VC8_1ZV = 0x65,
+    Z24V8_MS8_VC24_1ZV = 0x66,
+    Z24V8_MS4_VC12_2CS = 0x67,
+    Z24V8_MS4_VC4_2CS = 0x68,
+    Z24V8_MS8_VC8_2CS = 0x69,
+    Z24V8_MS8_VC24_2CS = 0x6a,
+    Z24V8_MS4_VC12_2CZV = 0x6f,
+    Z24V8_MS4_VC4_2CZV = 0x70,
+    Z24V8_MS8_VC8_2CZV = 0x71,
+    Z24V8_MS8_VC24_2CZV = 0x72,
+    Z24V8_MS4_VC12_2ZV = 0x73,
+    Z24V8_MS4_VC4_2ZV = 0x74,
+    Z24V8_MS8_VC8_2ZV = 0x75,
+    Z24V8_MS8_VC24_2ZV = 0x76,
+    Z24V8_MS4_VC12_4CSZV = 0x77,
+    Z24V8_MS4_VC4_4CSZV = 0x78,
+    Z24V8_MS8_VC8_4CSZV = 0x79,
+    Z24V8_MS8_VC24_4CSZV = 0x7a,
+    ZF32 = 0x7b,
+    ZF32_1Z = 0x7c,
+    ZF32_MS2_1Z = 0x7d,
+    ZF32_MS4_1Z = 0x7e,
+    ZF32_MS8_1Z = 0x7f,
+    ZF32_MS16_1Z = 0x80,
+    ZF32_2CS = 0x81,
+    ZF32_MS2_2CS = 0x82,
+    ZF32_MS4_2CS = 0x83,
+    ZF32_MS8_2CS = 0x84,
+    ZF32_MS16_2CS = 0x85,
+    ZF32_2CZ = 0x86,
+    ZF32_MS2_2CZ = 0x87,
+    ZF32_MS4_2CZ = 0x88,
+    ZF32_MS8_2CZ = 0x89,
+    ZF32_MS16_2CZ = 0x8a,
+    X8Z24_X16V8S8_MS4_VC12 = 0x8b,
+    X8Z24_X16V8S8_MS4_VC4 = 0x8c,
+    X8Z24_X16V8S8_MS8_VC8 = 0x8d,
+    X8Z24_X16V8S8_MS8_VC24 = 0x8e,
+    X8Z24_X16V8S8_MS4_VC12_1CS = 0x8f,
+    X8Z24_X16V8S8_MS4_VC4_1CS = 0x90,
+    X8Z24_X16V8S8_MS8_VC8_1CS = 0x91,
+    X8Z24_X16V8S8_MS8_VC24_1CS = 0x92,
+    X8Z24_X16V8S8_MS4_VC12_1ZV = 0x97,
+    X8Z24_X16V8S8_MS4_VC4_1ZV = 0x98,
+    X8Z24_X16V8S8_MS8_VC8_1ZV = 0x99,
+    X8Z24_X16V8S8_MS8_VC24_1ZV = 0x9a,
+    X8Z24_X16V8S8_MS4_VC12_1CZV = 0x9b,
+    X8Z24_X16V8S8_MS4_VC4_1CZV = 0x9c,
+    X8Z24_X16V8S8_MS8_VC8_1CZV = 0x9d,
+    X8Z24_X16V8S8_MS8_VC24_1CZV = 0x9e,
+    X8Z24_X16V8S8_MS4_VC12_2CS = 0x9f,
+    X8Z24_X16V8S8_MS4_VC4_2CS = 0xa0,
+    X8Z24_X16V8S8_MS8_VC8_2CS = 0xa1,
+    X8Z24_X16V8S8_MS8_VC24_2CS = 0xa2,
+    X8Z24_X16V8S8_MS4_VC12_2CSZV = 0xa3,
+    X8Z24_X16V8S8_MS4_VC4_2CSZV = 0xa4,
+    X8Z24_X16V8S8_MS8_VC8_2CSZV = 0xa5,
+    X8Z24_X16V8S8_MS8_VC24_2CSZV = 0xa6,
+    ZF32_X16V8S8_MS4_VC12 = 0xa7,
+    ZF32_X16V8S8_MS4_VC4 = 0xa8,
+    ZF32_X16V8S8_MS8_VC8 = 0xa9,
+    ZF32_X16V8S8_MS8_VC24 = 0xaa,
+    ZF32_X16V8S8_MS4_VC12_1CS = 0xab,
+    ZF32_X16V8S8_MS4_VC4_1CS = 0xac,
+    ZF32_X16V8S8_MS8_VC8_1CS = 0xad,
+    ZF32_X16V8S8_MS8_VC24_1CS = 0xae,
+    ZF32_X16V8S8_MS4_VC12_1ZV = 0xb3,
+    ZF32_X16V8S8_MS4_VC4_1ZV = 0xb4,
+    ZF32_X16V8S8_MS8_VC8_1ZV = 0xb5,
+    ZF32_X16V8S8_MS8_VC24_1ZV = 0xb6,
+    ZF32_X16V8S8_MS4_VC12_1CZV = 0xb7,
+    ZF32_X16V8S8_MS4_VC4_1CZV = 0xb8,
+    ZF32_X16V8S8_MS8_VC8_1CZV = 0xb9,
+    ZF32_X16V8S8_MS8_VC24_1CZV = 0xba,
+    ZF32_X16V8S8_MS4_VC12_2CS = 0xbb,
+    ZF32_X16V8S8_MS4_VC4_2CS = 0xbc,
+    ZF32_X16V8S8_MS8_VC8_2CS = 0xbd,
+    ZF32_X16V8S8_MS8_VC24_2CS = 0xbe,
+    ZF32_X16V8S8_MS4_VC12_2CSZV = 0xbf,
+    ZF32_X16V8S8_MS4_VC4_2CSZV = 0xc0,
+    ZF32_X16V8S8_MS8_VC8_2CSZV = 0xc1,
+    ZF32_X16V8S8_MS8_VC24_2CSZV = 0xc2,
+    ZF32_X24S8 = 0xc3,
+    ZF32_X24S8_1CS = 0xc4,
+    ZF32_X24S8_MS2_1CS = 0xc5,
+    ZF32_X24S8_MS4_1CS = 0xc6,
+    ZF32_X24S8_MS8_1CS = 0xc7,
+    ZF32_X24S8_MS16_1CS = 0xc8,
+    ZF32_X24S8_2CSZV = 0xce,
+    ZF32_X24S8_MS2_2CSZV = 0xcf,
+    ZF32_X24S8_MS4_2CSZV = 0xd0,
+    ZF32_X24S8_MS8_2CSZV = 0xd1,
+    ZF32_X24S8_MS16_2CSZV = 0xd2,
+    ZF32_X24S8_2CS = 0xd3,
+    ZF32_X24S8_MS2_2CS = 0xd4,
+    ZF32_X24S8_MS4_2CS = 0xd5,
+    ZF32_X24S8_MS8_2CS = 0xd6,
+    ZF32_X24S8_MS16_2CS = 0xd7,
+    S8 = 0x2a,
+    S8_2S = 0x2b,
+    GENERIC_16BX2 = 0xfe,
+    C32_2C = 0xd8,
+    C32_2CBR = 0xd9,
+    C32_2CBA = 0xda,
+    C32_2CRA = 0xdb,
+    C32_2BRA = 0xdc,
+    C32_MS2_2C = 0xdd,
+    C32_MS2_2CBR = 0xde,
+    C32_MS2_4CBRA = 0xcc,
+    C32_MS4_2C = 0xdf,
+    C32_MS4_2CBR = 0xe0,
+    C32_MS4_2CBA = 0xe1,
+    C32_MS4_2CRA = 0xe2,
+    C32_MS4_2BRA = 0xe3,
+    C32_MS4_4CBRA = 0x2c,
+    C32_MS8_MS16_2C = 0xe4,
+    C32_MS8_MS16_2CRA = 0xe5,
+    C64_2C = 0xe6,
+    C64_2CBR = 0xe7,
+    C64_2CBA = 0xe8,
+    C64_2CRA = 0xe9,
+    C64_2BRA = 0xea,
+    C64_MS2_2C = 0xeb,
+    C64_MS2_2CBR = 0xec,
+    C64_MS2_4CBRA = 0xcd,
+    C64_MS4_2C = 0xed,
+    C64_MS4_2CBR = 0xee,
+    C64_MS4_2CBA = 0xef,
+    C64_MS4_2CRA = 0xf0,
+    C64_MS4_2BRA = 0xf1,
+    C64_MS4_4CBRA = 0x2d,
+    C64_MS8_MS16_2C = 0xf2,
+    C64_MS8_MS16_2CRA = 0xf3,
+    C128_2C = 0xf4,
+    C128_2CR = 0xf5,
+    C128_MS2_2C = 0xf6,
+    C128_MS2_2CR = 0xf7,
+    C128_MS4_2C = 0xf8,
+    C128_MS4_2CR = 0xf9,
+    C128_MS8_MS16_2C = 0xfa,
+    C128_MS8_MS16_2CR = 0xfb,
+    X8C24 = 0xfc,
+    PITCH_NO_SWIZZLE = 0xfd,
+    SMSKED_MESSAGE = 0xca,
+    SMHOST_MESSAGE = 0xcb,
+};
+
+constexpr bool IsPitchKind(PTEKind kind) {
+    return kind == PTEKind::PITCH || kind == PTEKind::PITCH_NO_SWIZZLE;
+}
+
+} // namespace Tegra
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 7cb02631c..4b15c0f85 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -59,10 +59,11 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
         std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) {
             return query_pool == *pool;
         });
-    ASSERT(it != std::end(pools));
 
-    const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
-    usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
+    if (it != std::end(pools)) {
+        const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
+        usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
+    }
 }
 
 QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,