summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/buffer_cache/buffer_base.h14
-rw-r--r--src/video_core/engines/engine_upload.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp6
-rw-r--r--src/video_core/engines/fermi_2d.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp7
-rw-r--r--src/video_core/engines/maxwell_dma.cpp21
-rw-r--r--src/video_core/invalidation_accumulator.h79
-rw-r--r--src/video_core/macro/macro_hle.cpp42
-rw-r--r--src/video_core/memory_manager.cpp102
-rw-r--r--src/video_core/memory_manager.h18
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp42
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp68
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp128
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp3
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp14
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp27
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h24
29 files changed, 509 insertions, 179 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 537451b02..f617665de 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -85,6 +85,7 @@ add_library(video_core STATIC
gpu.h
gpu_thread.cpp
gpu_thread.h
+ invalidation_accumulator.h
memory_manager.cpp
memory_manager.h
precompiled_headers.h
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 92d77eef2..c47b7d866 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -430,7 +430,7 @@ private:
if (query_begin >= SizeBytes() || size < 0) {
return;
}
- u64* const untracked_words = Array<Type::Untracked>();
+ [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
u64* const state_words = Array<type>();
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@@ -483,7 +483,7 @@ private:
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
}
// Exclude CPU modified pages when visiting GPU pages
- const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
+ const u64 word = current_word;
u64 page = page_begin;
page_begin = 0;
@@ -531,7 +531,7 @@ private:
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
- const u64* const untracked_words = Array<Type::Untracked>();
+ [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -539,8 +539,7 @@ private:
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
- const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
- const u64 word = state_words[word_index] & ~off_word;
+ const u64 word = state_words[word_index];
if (word == 0) {
continue;
}
@@ -564,7 +563,7 @@ private:
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
- const u64* const untracked_words = Array<Type::Untracked>();
+ [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -574,8 +573,7 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
- const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
- const u64 word = state_words[word_index] & ~off_word;
+ const u64 word = state_words[word_index];
if (word == 0) {
continue;
}
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index cea1dd8b0..7f5a0c29d 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
x_elements, regs.line_count, regs.dest.BlockHeight(),
regs.dest.BlockDepth(), regs.line_length_in);
- memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
+ memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
}
}
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index e655e7254..a126c359c 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -6,6 +6,7 @@
#include "common/microprofile.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/sw_blitter/blitter.h"
+#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
@@ -20,8 +21,8 @@ namespace Tegra::Engines {
using namespace Texture;
-Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
- sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
+Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
+ sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
@@ -104,6 +105,7 @@ void Fermi2D::Blit() {
config.src_x0 = 0;
}
+ memory_manager.FlushCaching();
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
sw_blitter->Blit(src, regs.dst, config);
}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 523fbdec2..705b323e1 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -305,6 +305,7 @@ public:
private:
VideoCore::RasterizerInterface* rasterizer = nullptr;
std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
+ MemoryManager& memory_manager;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index fbfd1ddd2..97f547789 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -485,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
}
void Maxwell3D::ProcessQueryGet() {
- // TODO(Subv): Support the other query units.
- if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
- LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
- }
-
switch (regs.report_semaphore.query.operation) {
case Regs::ReportSemaphore::Operation::Release:
if (regs.report_semaphore.query.short_query != 0) {
@@ -649,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
const size_t copy_size = amount * sizeof(u32);
- memory_manager.WriteBlock(address, start_base, copy_size);
+ memory_manager.WriteBlockCached(address, start_base, copy_size);
// Increment the current buffer position.
regs.const_buffer.offset += static_cast<u32>(copy_size);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 01f70ea9e..7762c7d96 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -69,7 +69,7 @@ void MaxwellDMA::Launch() {
if (launch.multi_line_enable) {
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
-
+ memory_manager.FlushCaching();
if (!is_src_pitch && !is_dst_pitch) {
// If both the source and the destination are in block layout, assert.
CopyBlockLinearToBlockLinear();
@@ -104,6 +104,7 @@ void MaxwellDMA::Launch() {
reinterpret_cast<u8*>(tmp_buffer.data()),
regs.line_length_in * sizeof(u32));
} else {
+ memory_manager.FlushCaching();
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
((address & 0x180) >> 1) | ((address & 0x20) << 3);
@@ -121,8 +122,8 @@ void MaxwellDMA::Launch() {
memory_manager.ReadBlockUnsafe(
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
tmp_buffer.data(), tmp_buffer.size());
- memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(),
- tmp_buffer.size());
+ memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
+ tmp_buffer.size());
}
} else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@@ -132,7 +133,7 @@ void MaxwellDMA::Launch() {
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
tmp_buffer.size());
- memory_manager.WriteBlock(
+ memory_manager.WriteBlockCached(
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
tmp_buffer.data(), tmp_buffer.size());
}
@@ -141,8 +142,8 @@ void MaxwellDMA::Launch() {
std::vector<u8> tmp_buffer(regs.line_length_in);
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
regs.line_length_in);
- memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
- regs.line_length_in);
+ memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
+ regs.line_length_in);
}
}
}
@@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_out);
- memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+ memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyPitchToBlockLinear() {
@@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_in);
- memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+ memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::FastCopyBlockLinearToPitch() {
@@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
regs.src_params.block_size.height, regs.src_params.block_size.depth,
regs.pitch_out);
- memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+ memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
dst.block_size.height, dst.block_size.depth, pitch);
- memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
+ memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::ReleaseSemaphore() {
diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h
new file mode 100644
index 000000000..2c2aaf7bb
--- /dev/null
+++ b/src/video_core/invalidation_accumulator.h
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace VideoCommon {
+
+class InvalidationAccumulator {
+public:
+ InvalidationAccumulator() = default;
+ ~InvalidationAccumulator() = default;
+
+ void Add(GPUVAddr address, size_t size) {
+ const auto reset_values = [&]() {
+ if (has_collected) {
+ buffer.emplace_back(start_address, accumulated_size);
+ }
+ start_address = address;
+ accumulated_size = size;
+ last_collection = start_address + size;
+ };
+ if (address >= start_address && address + size <= last_collection) [[likely]] {
+ return;
+ }
+ size = ((address + size + atomicity_size_mask) & atomicity_mask) - address;
+ address = address & atomicity_mask;
+ if (!has_collected) [[unlikely]] {
+ reset_values();
+ has_collected = true;
+ return;
+ }
+ if (address != last_collection) [[unlikely]] {
+ reset_values();
+ return;
+ }
+ accumulated_size += size;
+ last_collection += size;
+ }
+
+ void Clear() {
+ buffer.clear();
+ start_address = 0;
+ last_collection = 0;
+ has_collected = false;
+ }
+
+ bool AnyAccumulated() const {
+ return has_collected;
+ }
+
+ template <typename Func>
+ void Callback(Func&& func) {
+ if (!has_collected) {
+ return;
+ }
+ buffer.emplace_back(start_address, accumulated_size);
+ for (auto& [address, size] : buffer) {
+ func(address, size);
+ }
+ }
+
+private:
+ static constexpr size_t atomicity_bits = 5;
+ static constexpr size_t atomicity_size = 1ULL << atomicity_bits;
+ static constexpr size_t atomicity_size_mask = atomicity_size - 1;
+ static constexpr size_t atomicity_mask = ~atomicity_size_mask;
+ GPUVAddr start_address{};
+ GPUVAddr last_collection{};
+ size_t accumulated_size{};
+ bool has_collected{};
+ std::vector<std::pair<VAddr, size_t>> buffer;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index a5476e795..6272a4652 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -50,38 +50,6 @@ protected:
Maxwell3D& maxwell3d;
};
-class HLE_DrawArrays final : public HLEMacroImpl {
-public:
- explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
-
- auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
- maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2],
- maxwell3d.regs.global_base_instance_index, 1);
- }
-};
-
-class HLE_DrawIndexed final : public HLEMacroImpl {
-public:
- explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
- maxwell3d.regs.index_buffer.start_addr_high = parameters[1];
- maxwell3d.regs.index_buffer.start_addr_low = parameters[2];
- maxwell3d.regs.index_buffer.format =
- static_cast<Engines::Maxwell3D::Regs::IndexFormat>(parameters[3]);
- maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-
- auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
- maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4],
- maxwell3d.regs.global_base_vertex_index,
- maxwell3d.regs.global_base_instance_index, 1);
- }
-};
-
/*
* @note: these macros have two versions, a normal and extended version, with the extended version
* also assigning the base vertex/instance.
@@ -497,11 +465,6 @@ public:
} // Anonymous namespace
HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
- builders.emplace(0xDD6A7FA92A7D2674ULL,
- std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
- [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
- return std::make_unique<HLE_DrawArrays>(maxwell3d__);
- }));
builders.emplace(0x0D61FC9FAAC9FCADULL,
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
@@ -512,11 +475,6 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__);
}));
- builders.emplace(0x2DB33AADB741839CULL,
- std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
- [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
- return std::make_unique<HLE_DrawIndexed>(maxwell3d__);
- }));
builders.emplace(0x771BB18C62444DA0ULL,
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 3a5cdeb39..3bcae3503 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -6,11 +6,13 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/settings.h"
#include "core/core.h"
#include "core/device_memory.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
#include "core/memory.h"
+#include "video_core/invalidation_accumulator.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
page_bits != big_page_bits ? page_bits : 0},
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
- 1, std::memory_order_acq_rel)} {
+ 1, std::memory_order_acq_rel)},
+ accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
address_space_size = 1ULL << address_space_bits;
page_size = 1ULL << page_bits;
page_mask = page_size - 1ULL;
@@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
big_page_table_cpu.resize(big_page_table_size);
big_page_continous.resize(big_page_table_size / continous_bits, 0);
entries.resize(page_table_size / 32, 0);
+ if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
+ fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
+ } else {
+ fastmem_arena = nullptr;
+ }
}
MemoryManager::~MemoryManager() = default;
@@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
if (size == 0) {
return;
}
- const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
-
- for (const auto& [map_addr, map_size] : submapped_ranges) {
- // Flush and invalidate through the GPU interface, to be asynchronous if possible.
- const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
- ASSERT(cpu_addr);
+ GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
- rasterizer->UnmapMemory(*cpu_addr, map_size);
+ for (const auto& [map_addr, map_size] : page_stash) {
+ rasterizer->UnmapMemory(map_addr, map_size);
}
+ page_stash.clear();
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
@@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
}
}
-template <bool is_safe>
+template <bool is_safe, bool use_fastmem>
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
[[maybe_unused]] VideoCommon::CacheType which) const {
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
@@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}
- u8* physical = memory.GetPointer(cpu_addr_base);
- std::memcpy(dest_buffer, physical, copy_amount);
+ if constexpr (use_fastmem) {
+ std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
+ } else {
+ u8* physical = memory.GetPointer(cpu_addr_base);
+ std::memcpy(dest_buffer, physical, copy_amount);
+ }
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
};
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
@@ -379,11 +388,15 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}
- if (!IsBigPageContinous(page_index)) [[unlikely]] {
- memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
+ if constexpr (use_fastmem) {
+ std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
} else {
- u8* physical = memory.GetPointer(cpu_addr_base);
- std::memcpy(dest_buffer, physical, copy_amount);
+ if (!IsBigPageContinous(page_index)) [[unlikely]] {
+ memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
+ } else {
+ u8* physical = memory.GetPointer(cpu_addr_base);
+ std::memcpy(dest_buffer, physical, copy_amount);
+ }
}
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
};
@@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
VideoCommon::CacheType which) const {
- ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which);
+ if (fastmem_arena) [[likely]] {
+ ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which);
+ return;
+ }
+ ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which);
}
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
const std::size_t size) const {
- ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
+ if (fastmem_arena) [[likely]] {
+ ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
+ return;
+ }
+ ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
}
template <bool is_safe>
@@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
}
+void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
+ std::size_t size) {
+ WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
+ accumulator->Add(gpu_dest_addr, size);
+}
+
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which) const {
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
@@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
GPUVAddr gpu_addr, std::size_t size) const {
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
- std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
+ GetSubmappedRangeImpl<true>(gpu_addr, size, result);
+ return result;
+}
+
+template <bool is_gpu_address>
+void MemoryManager::GetSubmappedRangeImpl(
+ GPUVAddr gpu_addr, std::size_t size,
+ std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
+ result) const {
+ std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
+ last_segment{};
std::optional<VAddr> old_page_addr{};
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
[[maybe_unused]] std::size_t offset,
@@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
}
old_page_addr = {cpu_addr_base + copy_amount};
if (!last_segment) {
- const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
- last_segment = {new_base_addr, copy_amount};
+ if constexpr (is_gpu_address) {
+ const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
+ last_segment = {new_base_addr, copy_amount};
+ } else {
+ last_segment = {cpu_addr_base, copy_amount};
+ }
} else {
last_segment->second += copy_amount;
}
@@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
}
old_page_addr = {cpu_addr_base + copy_amount};
if (!last_segment) {
- const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
- last_segment = {new_base_addr, copy_amount};
+ if constexpr (is_gpu_address) {
+ const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
+ last_segment = {new_base_addr, copy_amount};
+ } else {
+ last_segment = {cpu_addr_base, copy_amount};
+ }
} else {
last_segment->second += copy_amount;
}
@@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
};
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
split(0, 0, 0);
- return result;
+}
+
+void MemoryManager::FlushCaching() {
+ if (!accumulator->AnyAccumulated()) {
+ return;
+ }
+ accumulator->Callback([this](GPUVAddr addr, size_t size) {
+ GetSubmappedRangeImpl<false>(addr, size, page_stash);
+ });
+ rasterizer->InnerInvalidation(page_stash);
+ page_stash.clear();
+ accumulator->Clear();
}
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 828e13439..2936364f0 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -19,6 +19,10 @@ namespace VideoCore {
class RasterizerInterface;
}
+namespace VideoCommon {
+class InvalidationAccumulator;
+}
+
namespace Core {
class DeviceMemory;
namespace Memory {
@@ -80,6 +84,7 @@ public:
*/
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
+ void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
/**
* Checks if a gpu region can be simply read with a pointer.
@@ -129,12 +134,14 @@ public:
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
size_t max_size = std::numeric_limits<size_t>::max()) const;
+ void FlushCaching();
+
private:
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
- template <bool is_safe>
+ template <bool is_safe, bool use_fastmem>
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
VideoCommon::CacheType which) const;
@@ -154,6 +161,12 @@ private:
inline bool IsBigPageContinous(size_t big_page_index) const;
inline void SetBigPageContinous(size_t big_page_index, bool value);
+ template <bool is_gpu_address>
+ void GetSubmappedRangeImpl(
+ GPUVAddr gpu_addr, std::size_t size,
+ std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
+ result) const;
+
Core::System& system;
Core::Memory::Memory& memory;
Core::DeviceMemory& device_memory;
@@ -201,10 +214,13 @@ private:
Common::VirtualBuffer<u32> big_page_table_cpu;
std::vector<u64> big_page_continous;
+ std::vector<std::pair<VAddr, std::size_t>> page_stash{};
+ u8* fastmem_arena{};
constexpr static size_t continous_bits = 64;
const size_t unique_identifier;
+ std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
static std::atomic<size_t> unique_identifier_generator;
};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f44c7df50..1735b6164 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,7 @@
#include <functional>
#include <optional>
#include <span>
+#include <utility>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "video_core/cache_types.h"
@@ -95,6 +96,12 @@ public:
virtual void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
+ virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
+ for (const auto& [cpu_addr, size] : sequences) {
+ InvalidateRegion(cpu_addr, size);
+ }
+ }
+
/// Notify rasterizer that any caches of the specified region are desync with guest
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 3d328a250..f8398b511 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -148,7 +148,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
});
}
if (!extended_dynamic_state_2_extra) {
- dynamic_state.Refresh2(regs, topology, extended_dynamic_state_2);
+ dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2);
}
if (!extended_dynamic_state_3_blend) {
if (maxwell3d.dirty.flags[Dirty::Blending]) {
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 2f3d1c934..1578cb206 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -99,7 +99,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
- true, Settings::values.renderer_debug.GetValue())),
+ Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 487d8b416..b0153a502 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -336,6 +336,9 @@ void BufferCacheRuntime::Finish() {
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
+ return;
+ }
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -394,6 +397,9 @@ void BufferCacheRuntime::PostCopyBarrier() {
}
void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
+ if (dest_buffer == VK_NULL_HANDLE) {
+ return;
+ }
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -473,6 +479,11 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
});
} else {
+ if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) {
+ ReserveNullBuffer();
+ buffer = *null_buffer;
+ offset = 0;
+ }
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffer(index, buffer, offset);
});
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 04a3a861e..2a0f0dbf0 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -24,13 +24,15 @@ using Shader::ImageBufferDescriptor;
using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
using Tegra::Texture::TexturePair;
-ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
+ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_,
+ DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
vk::ShaderModule spv_module_)
- : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
+ : device{device_}, pipeline_cache(pipeline_cache_),
+ update_descriptor_queue{update_descriptor_queue_}, info{info_},
spv_module(std::move(spv_module_)) {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
@@ -56,23 +58,27 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
- pipeline = device.GetLogical().CreateComputePipeline({
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .pNext = nullptr,
- .flags = flags,
- .stage{
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
- .flags = 0,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = *spv_module,
- .pName = "main",
- .pSpecializationInfo = nullptr,
+ pipeline = device.GetLogical().CreateComputePipeline(
+ {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = flags,
+ .stage{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext =
+ device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = *spv_module,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
+ .layout = *pipeline_layout,
+ .basePipelineHandle = 0,
+ .basePipelineIndex = 0,
},
- .layout = *pipeline_layout,
- .basePipelineHandle = 0,
- .basePipelineIndex = 0,
- });
+ *pipeline_cache);
+
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index d70837fc5..78d77027f 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -28,7 +28,8 @@ class Scheduler;
class ComputePipeline {
public:
- explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
+ explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache,
+ DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* thread_worker,
PipelineStatistics* pipeline_statistics,
@@ -46,6 +47,7 @@ public:
private:
const Device& device;
+ vk::PipelineCache& pipeline_cache;
UpdateDescriptorQueue& update_descriptor_queue;
Shader::Info info;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index d11383bf1..f91bb5a1d 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -234,13 +234,14 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m
GraphicsPipeline::GraphicsPipeline(
Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
- VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
+ vk::PipelineCache& pipeline_cache_, VideoCore::ShaderNotify* shader_notify,
+ const Device& device_, DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos)
- : key{key_}, device{device_}, texture_cache{texture_cache_},
- buffer_cache{buffer_cache_}, scheduler{scheduler_},
+ : key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
+ pipeline_cache(pipeline_cache_), scheduler{scheduler_},
update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
@@ -644,12 +645,15 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.pNext = nullptr,
.flags = 0,
.topology = input_assembly_topology,
- .primitiveRestartEnable = dynamic.primitive_restart_enable != 0 &&
- ((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
- device.IsTopologyListPrimitiveRestartSupported()) ||
- SupportsPrimitiveRestart(input_assembly_topology) ||
- (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
- device.IsPatchListPrimitiveRestartSupported())),
+ .primitiveRestartEnable =
+ dynamic.primitive_restart_enable != 0 &&
+ ((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
+ device.IsTopologyListPrimitiveRestartSupported()) ||
+ SupportsPrimitiveRestart(input_assembly_topology) ||
+ (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
+ device.IsPatchListPrimitiveRestartSupported()))
+ ? VK_TRUE
+ : VK_FALSE,
};
const VkPipelineTessellationStateCreateInfo tessellation_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
@@ -699,7 +703,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.cullMode = static_cast<VkCullModeFlags>(
dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
- .depthBiasEnable = (dynamic.depth_bias_enable == 0 ? VK_TRUE : VK_FALSE),
+ .depthBiasEnable = (dynamic.depth_bias_enable != 0 ? VK_TRUE : VK_FALSE),
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
@@ -894,27 +898,29 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
- pipeline = device.GetLogical().CreateGraphicsPipeline({
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .pNext = nullptr,
- .flags = flags,
- .stageCount = static_cast<u32>(shader_stages.size()),
- .pStages = shader_stages.data(),
- .pVertexInputState = &vertex_input_ci,
- .pInputAssemblyState = &input_assembly_ci,
- .pTessellationState = &tessellation_ci,
- .pViewportState = &viewport_ci,
- .pRasterizationState = &rasterization_ci,
- .pMultisampleState = &multisample_ci,
- .pDepthStencilState = &depth_stencil_ci,
- .pColorBlendState = &color_blend_ci,
- .pDynamicState = &dynamic_state_ci,
- .layout = *pipeline_layout,
- .renderPass = render_pass,
- .subpass = 0,
- .basePipelineHandle = nullptr,
- .basePipelineIndex = 0,
- });
+ pipeline = device.GetLogical().CreateGraphicsPipeline(
+ {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = flags,
+ .stageCount = static_cast<u32>(shader_stages.size()),
+ .pStages = shader_stages.data(),
+ .pVertexInputState = &vertex_input_ci,
+ .pInputAssemblyState = &input_assembly_ci,
+ .pTessellationState = &tessellation_ci,
+ .pViewportState = &viewport_ci,
+ .pRasterizationState = &rasterization_ci,
+ .pMultisampleState = &multisample_ci,
+ .pDepthStencilState = &depth_stencil_ci,
+ .pColorBlendState = &color_blend_ci,
+ .pDynamicState = &dynamic_state_ci,
+ .layout = *pipeline_layout,
+ .renderPass = render_pass,
+ .subpass = 0,
+ .basePipelineHandle = nullptr,
+ .basePipelineIndex = 0,
+ },
+ *pipeline_cache);
}
void GraphicsPipeline::Validate() {
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 1ed2967be..67c657d0e 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -70,16 +70,14 @@ class GraphicsPipeline {
static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
public:
- explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache,
- TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify,
- const Device& device, DescriptorPool& descriptor_pool,
- UpdateDescriptorQueue& update_descriptor_queue,
- Common::ThreadWorker* worker_thread,
- PipelineStatistics* pipeline_statistics,
- RenderPassCache& render_pass_cache,
- const GraphicsPipelineCacheKey& key,
- std::array<vk::ShaderModule, NUM_STAGES> stages,
- const std::array<const Shader::Info*, NUM_STAGES>& infos);
+ explicit GraphicsPipeline(
+ Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
+ vk::PipelineCache& pipeline_cache, VideoCore::ShaderNotify* shader_notify,
+ const Device& device, DescriptorPool& descriptor_pool,
+ UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread,
+ PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
+ const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
+ const std::array<const Shader::Info*, NUM_STAGES>& infos);
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
@@ -133,6 +131,7 @@ private:
const Device& device;
TextureCache& texture_cache;
BufferCache& buffer_cache;
+ vk::PipelineCache& pipeline_cache;
Scheduler& scheduler;
UpdateDescriptorQueue& update_descriptor_queue;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 3046b72ab..67e5bc648 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -55,6 +55,7 @@ using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment;
constexpr u32 CACHE_VERSION = 10;
+constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'};
template <typename Container>
auto MakeSpan(Container& container) {
@@ -284,6 +285,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
texture_cache{texture_cache_}, shader_notify{shader_notify_},
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
+ use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()},
workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
const auto& float_control{device.FloatControlProperties()};
@@ -362,7 +364,12 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
};
}
-PipelineCache::~PipelineCache() = default;
+PipelineCache::~PipelineCache() {
+ if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) {
+ SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
+ CACHE_VERSION);
+ }
+}
GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
@@ -418,6 +425,12 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
}
pipeline_cache_filename = base_dir / "vulkan.bin";
+ if (use_vulkan_pipeline_cache) {
+ vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin";
+ vulkan_pipeline_cache =
+ LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, CACHE_VERSION);
+ }
+
struct {
std::mutex mutex;
size_t total{};
@@ -496,6 +509,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
workers.WaitForRequests(stop_loading);
+ if (use_vulkan_pipeline_cache) {
+ SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
+ CACHE_VERSION);
+ }
+
if (state.statistics) {
state.statistics->Report();
}
@@ -616,10 +634,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
previous_stage = &program;
}
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
- return std::make_unique<GraphicsPipeline>(scheduler, buffer_cache, texture_cache,
- &shader_notify, device, descriptor_pool,
- update_descriptor_queue, thread_worker, statistics,
- render_pass_cache, key, std::move(modules), infos);
+ return std::make_unique<GraphicsPipeline>(
+ scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device,
+ descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
+ std::move(modules), infos);
} catch (const Shader::Exception& exception) {
LOG_ERROR(Render_Vulkan, "{}", exception.what());
@@ -689,13 +707,107 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
spv_module.SetObjectNameEXT(name.c_str());
}
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
- return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
- thread_worker, statistics, &shader_notify,
- program.info, std::move(spv_module));
+ return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, descriptor_pool,
+ update_descriptor_queue, thread_worker, statistics,
+ &shader_notify, program.info, std::move(spv_module));
} catch (const Shader::Exception& exception) {
LOG_ERROR(Render_Vulkan, "{}", exception.what());
return nullptr;
}
+void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
+ const vk::PipelineCache& pipeline_cache,
+ u32 cache_version) try {
+ std::ofstream file(filename, std::ios::binary);
+ file.exceptions(std::ifstream::failbit);
+ if (!file.is_open()) {
+ LOG_ERROR(Common_Filesystem, "Failed to open Vulkan driver pipeline cache file {}",
+ Common::FS::PathToUTF8String(filename));
+ return;
+ }
+ file.write(VULKAN_CACHE_MAGIC_NUMBER.data(), VULKAN_CACHE_MAGIC_NUMBER.size())
+ .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
+
+ size_t cache_size = 0;
+ std::vector<char> cache_data;
+ if (pipeline_cache) {
+ pipeline_cache.Read(&cache_size, nullptr);
+ cache_data.resize(cache_size);
+ pipeline_cache.Read(&cache_size, cache_data.data());
+ }
+ file.write(cache_data.data(), cache_size);
+
+ LOG_INFO(Render_Vulkan, "Vulkan driver pipelines cached at: {}",
+ Common::FS::PathToUTF8String(filename));
+
+} catch (const std::ios_base::failure& e) {
+ LOG_ERROR(Common_Filesystem, "{}", e.what());
+ if (!Common::FS::RemoveFile(filename)) {
+ LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}",
+ Common::FS::PathToUTF8String(filename));
+ }
+}
+
+vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::path& filename,
+ u32 expected_cache_version) {
+ const auto create_pipeline_cache = [this](size_t data_size, const void* data) {
+ VkPipelineCacheCreateInfo pipeline_cache_ci = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .initialDataSize = data_size,
+ .pInitialData = data};
+ return device.GetLogical().CreatePipelineCache(pipeline_cache_ci);
+ };
+ try {
+ std::ifstream file(filename, std::ios::binary | std::ios::ate);
+ if (!file.is_open()) {
+ return create_pipeline_cache(0, nullptr);
+ }
+ file.exceptions(std::ifstream::failbit);
+ const auto end{file.tellg()};
+ file.seekg(0, std::ios::beg);
+
+ std::array<char, 8> magic_number;
+ u32 cache_version;
+ file.read(magic_number.data(), magic_number.size())
+ .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
+ if (magic_number != VULKAN_CACHE_MAGIC_NUMBER || cache_version != expected_cache_version) {
+ file.close();
+ if (Common::FS::RemoveFile(filename)) {
+ if (magic_number != VULKAN_CACHE_MAGIC_NUMBER) {
+ LOG_ERROR(Common_Filesystem, "Invalid Vulkan driver pipeline cache file");
+ }
+ if (cache_version != expected_cache_version) {
+ LOG_INFO(Common_Filesystem, "Deleting old Vulkan driver pipeline cache");
+ }
+ } else {
+ LOG_ERROR(Common_Filesystem,
+ "Invalid Vulkan pipeline cache file and failed to delete it in \"{}\"",
+ Common::FS::PathToUTF8String(filename));
+ }
+ return create_pipeline_cache(0, nullptr);
+ }
+
+ const size_t cache_size = static_cast<size_t>(end) - magic_number.size();
+ std::vector<char> cache_data(cache_size);
+ file.read(cache_data.data(), cache_size);
+
+ LOG_INFO(Render_Vulkan,
+ "Loaded Vulkan driver pipeline cache: ", Common::FS::PathToUTF8String(filename));
+
+ return create_pipeline_cache(cache_size, cache_data.data());
+
+ } catch (const std::ios_base::failure& e) {
+ LOG_ERROR(Common_Filesystem, "{}", e.what());
+ if (!Common::FS::RemoveFile(filename)) {
+ LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}",
+ Common::FS::PathToUTF8String(filename));
+ }
+
+ return create_pipeline_cache(0, nullptr);
+ }
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index b4f593ef5..5171912d7 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -135,6 +135,12 @@ private:
PipelineStatistics* statistics,
bool build_in_parallel);
+ void SerializeVulkanPipelineCache(const std::filesystem::path& filename,
+ const vk::PipelineCache& pipeline_cache, u32 cache_version);
+
+ vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename,
+ u32 expected_cache_version);
+
const Device& device;
Scheduler& scheduler;
DescriptorPool& descriptor_pool;
@@ -144,6 +150,7 @@ private:
TextureCache& texture_cache;
VideoCore::ShaderNotify& shader_notify;
bool use_asynchronous_shaders{};
+ bool use_vulkan_pipeline_cache{};
GraphicsPipelineCacheKey graphics_key{};
GraphicsPipeline* current_pipeline{};
@@ -158,6 +165,9 @@ private:
std::filesystem::path pipeline_cache_filename;
+ std::filesystem::path vulkan_pipeline_cache_filename;
+ vk::PipelineCache vulkan_pipeline_cache;
+
Common::ThreadWorker workers;
Common::ThreadWorker serialization_thread;
DynamicFeatures dynamic_features;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 242bf9602..ed4a72166 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
+ gpu_memory->FlushCaching();
query_cache.UpdateCounters();
@@ -393,6 +394,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
void RasterizerVulkan::DispatchCompute() {
FlushWork();
+ gpu_memory->FlushCaching();
ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
if (!pipeline) {
@@ -481,6 +483,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
}
}
+void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ for (const auto& [addr, size] : sequences) {
+ texture_cache.WriteMemory(addr, size);
+ }
+ }
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ for (const auto& [addr, size] : sequences) {
+ buffer_cache.WriteMemory(addr, size);
+ }
+ }
+ {
+ for (const auto& [addr, size] : sequences) {
+ query_cache.InvalidateRegion(addr, size);
+ pipeline_cache.InvalidateRegion(addr, size);
+ }
+ }
+}
+
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c661e5b19..472cc64d9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -79,6 +79,7 @@ public:
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
+ void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 3d1109b30..77aee802d 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -1487,6 +1487,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
max_push_descriptors = push_descriptor.maxPushDescriptors;
}
+
+ has_null_descriptor = true;
+
return extensions;
}
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 920a8f4e3..6042046e1 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -397,6 +397,10 @@ public:
return must_emulate_bgr565;
}
+ bool HasNullDescriptor() const {
+ return has_null_descriptor;
+ }
+
u32 GetMaxVertexInputAttributes() const {
return max_vertex_input_attributes;
}
@@ -511,6 +515,7 @@ private:
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
+ bool has_null_descriptor{}; ///< Has support for null descriptors.
u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline
u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index 562039b56..b6d83e446 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -32,7 +32,7 @@
namespace Vulkan {
namespace {
[[nodiscard]] std::vector<const char*> RequiredExtensions(
- Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) {
+ Core::Frontend::WindowSystemType window_type, bool enable_validation) {
std::vector<const char*> extensions;
extensions.reserve(6);
switch (window_type) {
@@ -65,7 +65,7 @@ namespace {
if (window_type != Core::Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
- if (enable_debug_utils) {
+ if (enable_validation) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
@@ -95,9 +95,9 @@ namespace {
return true;
}
-[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) {
+[[nodiscard]] std::vector<const char*> Layers(bool enable_validation) {
std::vector<const char*> layers;
- if (enable_layers) {
+ if (enable_validation) {
layers.push_back("VK_LAYER_KHRONOS_validation");
}
return layers;
@@ -125,7 +125,7 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const
vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
u32 required_version, Core::Frontend::WindowSystemType window_type,
- bool enable_debug_utils, bool enable_layers) {
+ bool enable_validation) {
if (!library.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Vulkan library not available");
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
@@ -138,11 +138,11 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
- const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils);
+ const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation);
if (!AreExtensionsSupported(dld, extensions)) {
throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
}
- std::vector<const char*> layers = Layers(enable_layers);
+ std::vector<const char*> layers = Layers(enable_validation);
RemoveUnavailableLayers(dld, layers);
const u32 available_version = vk::AvailableVersion(dld);
diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h
index 40419d802..b59b92f83 100644
--- a/src/video_core/vulkan_common/vulkan_instance.h
+++ b/src/video_core/vulkan_common/vulkan_instance.h
@@ -17,8 +17,7 @@ namespace Vulkan {
* @param dld Dispatch table to load function pointers into
* @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1)
* @param window_type Window system type's enabled extension
- * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not
- * @param enable_layers Whether to enable Vulkan validation layers or not
+ * @param enable_validation Whether to enable Vulkan validation layers or not
*
* @return A new Vulkan instance
* @throw vk::Exception on failure
@@ -26,6 +25,6 @@ namespace Vulkan {
[[nodiscard]] vk::Instance CreateInstance(
const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version,
Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless,
- bool enable_debug_utils = false, bool enable_layers = false);
+ bool enable_validation = false);
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 861767c13..61be1fce1 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -152,6 +152,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCreateGraphicsPipelines);
X(vkCreateImage);
X(vkCreateImageView);
+ X(vkCreatePipelineCache);
X(vkCreatePipelineLayout);
X(vkCreateQueryPool);
X(vkCreateRenderPass);
@@ -171,6 +172,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkDestroyImage);
X(vkDestroyImageView);
X(vkDestroyPipeline);
+ X(vkDestroyPipelineCache);
X(vkDestroyPipelineLayout);
X(vkDestroyQueryPool);
X(vkDestroyRenderPass);
@@ -188,6 +190,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkGetEventStatus);
X(vkGetFenceStatus);
X(vkGetImageMemoryRequirements);
+ X(vkGetPipelineCacheData);
X(vkGetMemoryFdKHR);
#ifdef _WIN32
X(vkGetMemoryWin32HandleKHR);
@@ -431,6 +434,10 @@ void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noex
dld.vkDestroyPipeline(device, handle, nullptr);
}
+void Destroy(VkDevice device, VkPipelineCache handle, const DeviceDispatch& dld) noexcept {
+ dld.vkDestroyPipelineCache(device, handle, nullptr);
+}
+
void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyPipelineLayout(device, handle, nullptr);
}
@@ -651,6 +658,10 @@ void ShaderModule::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name);
}
+void PipelineCache::SetObjectNameEXT(const char* name) const {
+ SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_PIPELINE_CACHE, name);
+}
+
void Semaphore::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name);
}
@@ -746,21 +757,29 @@ DescriptorSetLayout Device::CreateDescriptorSetLayout(
return DescriptorSetLayout(object, handle, *dld);
}
+PipelineCache Device::CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const {
+ VkPipelineCache cache;
+ Check(dld->vkCreatePipelineCache(handle, &ci, nullptr, &cache));
+ return PipelineCache(cache, handle, *dld);
+}
+
PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const {
VkPipelineLayout object;
Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object));
return PipelineLayout(object, handle, *dld);
}
-Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const {
+Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci,
+ VkPipelineCache cache) const {
VkPipeline object;
- Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object));
+ Check(dld->vkCreateGraphicsPipelines(handle, cache, 1, &ci, nullptr, &object));
return Pipeline(object, handle, *dld);
}
-Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const {
+Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci,
+ VkPipelineCache cache) const {
VkPipeline object;
- Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object));
+ Check(dld->vkCreateComputePipelines(handle, cache, 1, &ci, nullptr, &object));
return Pipeline(object, handle, *dld);
}
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index accfad8c1..412779b51 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -270,6 +270,7 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{};
PFN_vkCreateImage vkCreateImage{};
PFN_vkCreateImageView vkCreateImageView{};
+ PFN_vkCreatePipelineCache vkCreatePipelineCache{};
PFN_vkCreatePipelineLayout vkCreatePipelineLayout{};
PFN_vkCreateQueryPool vkCreateQueryPool{};
PFN_vkCreateRenderPass vkCreateRenderPass{};
@@ -289,6 +290,7 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkDestroyImage vkDestroyImage{};
PFN_vkDestroyImageView vkDestroyImageView{};
PFN_vkDestroyPipeline vkDestroyPipeline{};
+ PFN_vkDestroyPipelineCache vkDestroyPipelineCache{};
PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{};
PFN_vkDestroyQueryPool vkDestroyQueryPool{};
PFN_vkDestroyRenderPass vkDestroyRenderPass{};
@@ -306,6 +308,7 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkGetEventStatus vkGetEventStatus{};
PFN_vkGetFenceStatus vkGetFenceStatus{};
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
+ PFN_vkGetPipelineCacheData vkGetPipelineCacheData{};
PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{};
#ifdef _WIN32
PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
@@ -351,6 +354,7 @@ void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkPipelineCache, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept;
@@ -773,6 +777,18 @@ public:
void SetObjectNameEXT(const char* name) const;
};
+class PipelineCache : public Handle<VkPipelineCache, VkDevice, DeviceDispatch> {
+ using Handle<VkPipelineCache, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ /// Set object name.
+ void SetObjectNameEXT(const char* name) const;
+
+ VkResult Read(size_t* size, void* data) const noexcept {
+ return dld->vkGetPipelineCacheData(owner, handle, size, data);
+ }
+};
+
class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
@@ -844,11 +860,15 @@ public:
DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const;
+ PipelineCache CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const;
+
PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const;
- Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const;
+ Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci,
+ VkPipelineCache cache = nullptr) const;
- Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const;
+ Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci,
+ VkPipelineCache cache = nullptr) const;
Sampler CreateSampler(const VkSamplerCreateInfo& ci) const;