diff options
Diffstat (limited to '')
44 files changed, 850 insertions, 176 deletions
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 149e621f9..1638b79f5 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) { // Renderer values.fsr_sharpening_slider.SetGlobal(true); values.renderer_backend.SetGlobal(true); + values.renderer_force_max_clock.SetGlobal(true); values.vulkan_device.SetGlobal(true); values.aspect_ratio.SetGlobal(true); values.max_anisotropy.SetGlobal(true); @@ -200,6 +201,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); values.use_pessimistic_flushes.SetGlobal(true); + values.use_vulkan_driver_pipeline_cache.SetGlobal(true); values.bg_red.SetGlobal(true); values.bg_green.SetGlobal(true); values.bg_blue.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 5017951c5..9eb3711ca 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -415,6 +415,7 @@ struct Values { // Renderer SwitchableSetting<RendererBackend, true> renderer_backend{ RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; + SwitchableSetting<bool> renderer_force_max_clock{true, "force_max_clock"}; Setting<bool> renderer_debug{false, "debug"}; Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; @@ -451,6 +452,8 @@ struct Values { SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"}; + SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true, + "use_vulkan_driver_pipeline_cache"}; SwitchableSetting<u8> bg_red{0, "bg_red"}; SwitchableSetting<u8> bg_green{0, "bg_green"}; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 26be74df4..a1e41faff 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -436,7 +436,7 @@ struct Memory::Impl { } if (Settings::IsFastmemEnabled()) { - const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; + const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached; system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); } diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp index f7236afab..5cd0628f2 100644 --- a/src/tests/video_core/buffer_base.cpp +++ b/src/tests/video_core/buffer_base.cpp @@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") { int num = 0; buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); - REQUIRE(num == 0); + REQUIRE(num == 1); REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); buffer.FlushCachedWrites(); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index aa271a377..f617665de 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -85,6 +85,7 @@ add_library(video_core STATIC gpu.h gpu_thread.cpp gpu_thread.h + invalidation_accumulator.h memory_manager.cpp memory_manager.h precompiled_headers.h @@ -190,6 +191,8 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.cpp renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_texture_cache_base.cpp + renderer_vulkan/vk_turbo_mode.cpp + renderer_vulkan/vk_turbo_mode.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h shader_cache.cpp diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 92d77eef2..c47b7d866 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -430,7 +430,7 @@ private: if (query_begin >= SizeBytes() || size < 0) { return; } - u64* const untracked_words = Array<Type::Untracked>(); + [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>(); u64* const state_words = Array<type>(); const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; @@ -483,7 +483,7 @@ private: NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); } // Exclude CPU modified pages when visiting GPU pages - const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); + const u64 word = current_word; u64 page = page_begin; page_begin = 0; @@ -531,7 +531,7 @@ private: [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); - const u64* const untracked_words = Array<Type::Untracked>(); + [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>(); const u64* const state_words = Array<type>(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; @@ -539,8 +539,7 @@ private: const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { - const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; - const u64 word = state_words[word_index] & ~off_word; + const u64 word = state_words[word_index]; if (word == 0) { continue; } @@ -564,7 +563,7 @@ private: [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); - const u64* const untracked_words = Array<Type::Untracked>(); + [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>(); const u64* const state_words = Array<type>(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; @@ -574,8 +573,7 @@ private: u64 begin = std::numeric_limits<u64>::max(); u64 end = 0; for (u64 word_index = word_begin; word_index < word_end; ++word_index) { - const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; - const u64 word = state_words[word_index] & ~off_word; + const u64 word = state_words[word_index]; if (word == 0) { continue; } diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index cea1dd8b0..7f5a0c29d 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) { regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, x_elements, regs.line_count, regs.dest.BlockHeight(), regs.dest.BlockDepth(), regs.line_length_in); - memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); + memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index e655e7254..a126c359c 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/sw_blitter/blitter.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" @@ -20,8 +21,8 @@ namespace Tegra::Engines { using namespace Texture; -Fermi2D::Fermi2D(MemoryManager& memory_manager_) { - sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_); +Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} { + sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager); // Nvidia's OpenGL driver seems to assume these values regs.src.depth = 1; regs.dst.depth = 1; @@ -104,6 +105,7 @@ void Fermi2D::Blit() { config.src_x0 = 0; } + memory_manager.FlushCaching(); if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { sw_blitter->Blit(src, regs.dst, config); } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 523fbdec2..705b323e1 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -305,6 +305,7 @@ public: private: VideoCore::RasterizerInterface* rasterizer = nullptr; std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter; + MemoryManager& memory_manager; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fbfd1ddd2..97f547789 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -485,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { } void Maxwell3D::ProcessQueryGet() { - // TODO(Subv): Support the other query units. - if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) { - LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented"); - } - switch (regs.report_semaphore.query.operation) { case Regs::ReportSemaphore::Operation::Release: if (regs.report_semaphore.query.short_query != 0) { @@ -649,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) { const GPUVAddr address{buffer_address + regs.const_buffer.offset}; const size_t copy_size = amount * sizeof(u32); - memory_manager.WriteBlock(address, start_base, copy_size); + memory_manager.WriteBlockCached(address, start_base, copy_size); // Increment the current buffer position. regs.const_buffer.offset += static_cast<u32>(copy_size); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 01f70ea9e..7762c7d96 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -69,7 +69,7 @@ void MaxwellDMA::Launch() { if (launch.multi_line_enable) { const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; - + memory_manager.FlushCaching(); if (!is_src_pitch && !is_dst_pitch) { // If both the source and the destination are in block layout, assert. CopyBlockLinearToBlockLinear(); @@ -104,6 +104,7 @@ void MaxwellDMA::Launch() { reinterpret_cast<u8*>(tmp_buffer.data()), regs.line_length_in * sizeof(u32)); } else { + memory_manager.FlushCaching(); const auto convert_linear_2_blocklinear_addr = [](u64 address) { return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | ((address & 0x180) >> 1) | ((address & 0x20) << 3); @@ -121,8 +122,8 @@ void MaxwellDMA::Launch() { memory_manager.ReadBlockUnsafe( convert_linear_2_blocklinear_addr(regs.offset_in + offset), tmp_buffer.data(), tmp_buffer.size()); - memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(), - tmp_buffer.size()); + memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), + tmp_buffer.size()); } } else if (is_src_pitch && !is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); @@ -132,7 +133,7 @@ void MaxwellDMA::Launch() { for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), tmp_buffer.size()); - memory_manager.WriteBlock( + memory_manager.WriteBlockCached( convert_linear_2_blocklinear_addr(regs.offset_out + offset), tmp_buffer.data(), tmp_buffer.size()); } @@ -141,8 +142,8 @@ void MaxwellDMA::Launch() { std::vector<u8> tmp_buffer(regs.line_length_in); memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); - memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), - regs.line_length_in); + memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), + regs.line_length_in); } } } @@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, regs.pitch_out); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::CopyPitchToBlockLinear() { @@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, regs.pitch_in); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::FastCopyBlockLinearToPitch() { @@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { regs.src_params.block_size.height, regs.src_params.block_size.depth, regs.pitch_out); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::CopyBlockLinearToBlockLinear() { @@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, dst.block_size.height, dst.block_size.depth, pitch); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::ReleaseSemaphore() { diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index e6dc24f22..f275b2aa9 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -47,6 +47,7 @@ set(SHADER_FILES vulkan_present_scaleforce_fp16.frag vulkan_present_scaleforce_fp32.frag vulkan_quad_indexed.comp + vulkan_turbo_mode.comp vulkan_uint8.comp ) diff --git a/src/video_core/host_shaders/vulkan_turbo_mode.comp b/src/video_core/host_shaders/vulkan_turbo_mode.comp new file mode 100644 index 000000000..d651001d9 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_turbo_mode.comp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core + +layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in; + +layout (binding = 0) buffer ThreadData { + uint data[]; +}; + +uint xorshift32(uint x) { + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return x; +} + +uint getGlobalIndex() { + return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y; +} + +void main() { + uint myIndex = xorshift32(getGlobalIndex()); + uint otherIndex = xorshift32(myIndex); + + uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1; + atomicAdd(data[myIndex % data.length()], otherValue); +} diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h new file mode 100644 index 000000000..2c2aaf7bb --- /dev/null +++ b/src/video_core/invalidation_accumulator.h @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <utility> +#include <vector> + +#include "common/common_types.h" + +namespace VideoCommon { + +class InvalidationAccumulator { +public: + InvalidationAccumulator() = default; + ~InvalidationAccumulator() = default; + + void Add(GPUVAddr address, size_t size) { + const auto reset_values = [&]() { + if (has_collected) { + buffer.emplace_back(start_address, accumulated_size); + } + start_address = address; + accumulated_size = size; + last_collection = start_address + size; + }; + if (address >= start_address && address + size <= last_collection) [[likely]] { + return; + } + size = ((address + size + atomicity_size_mask) & atomicity_mask) - address; + address = address & atomicity_mask; + if (!has_collected) [[unlikely]] { + reset_values(); + has_collected = true; + return; + } + if (address != last_collection) [[unlikely]] { + reset_values(); + return; + } + accumulated_size += size; + last_collection += size; + } + + void Clear() { + buffer.clear(); + start_address = 0; + last_collection = 0; + has_collected = false; + } + + bool AnyAccumulated() const { + return has_collected; + } + + template <typename Func> + void Callback(Func&& func) { + if (!has_collected) { + return; + } + buffer.emplace_back(start_address, accumulated_size); + for (auto& [address, size] : buffer) { + func(address, size); + } + } + +private: + static constexpr size_t atomicity_bits = 5; + static constexpr size_t atomicity_size = 1ULL << atomicity_bits; + static constexpr size_t atomicity_size_mask = atomicity_size - 1; + static constexpr size_t atomicity_mask = ~atomicity_size_mask; + GPUVAddr start_address{}; + GPUVAddr last_collection{}; + size_t accumulated_size{}; + bool has_collected{}; + std::vector<std::pair<VAddr, size_t>> buffer; +}; + +} // namespace VideoCommon diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index a5476e795..6272a4652 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -50,38 +50,6 @@ protected: Maxwell3D& maxwell3d; }; -class HLE_DrawArrays final : public HLEMacroImpl { -public: - explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - - auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); - maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2], - maxwell3d.regs.global_base_instance_index, 1); - } -}; - -class HLE_DrawIndexed final : public HLEMacroImpl { -public: - explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - maxwell3d.regs.index_buffer.start_addr_high = parameters[1]; - maxwell3d.regs.index_buffer.start_addr_low = parameters[2]; - maxwell3d.regs.index_buffer.format = - static_cast<Engines::Maxwell3D::Regs::IndexFormat>(parameters[3]); - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - - auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); - maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4], - maxwell3d.regs.global_base_vertex_index, - maxwell3d.regs.global_base_instance_index, 1); - } -}; - /* * @note: these macros have two versions, a normal and extended version, with the extended version * also assigning the base vertex/instance. @@ -497,11 +465,6 @@ public: } // Anonymous namespace HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { - builders.emplace(0xDD6A7FA92A7D2674ULL, - std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { - return std::make_unique<HLE_DrawArrays>(maxwell3d__); - })); builders.emplace(0x0D61FC9FAAC9FCADULL, std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { @@ -512,11 +475,6 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__); })); - builders.emplace(0x2DB33AADB741839CULL, - std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { - return std::make_unique<HLE_DrawIndexed>(maxwell3d__); - })); builders.emplace(0x771BB18C62444DA0ULL, std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3a5cdeb39..3bcae3503 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,11 +6,13 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" +#include "common/settings.h" #include "core/core.h" #include "core/device_memory.h" #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" #include "core/memory.h" +#include "video_core/invalidation_accumulator.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" @@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits != big_page_bits ? page_bits : 0}, kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( - 1, std::memory_order_acq_rel)} { + 1, std::memory_order_acq_rel)}, + accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { address_space_size = 1ULL << address_space_bits; page_size = 1ULL << page_bits; page_mask = page_size - 1ULL; @@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); entries.resize(page_table_size / 32, 0); + if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) { + fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); + } else { + fastmem_arena = nullptr; + } } MemoryManager::~MemoryManager() = default; @@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { if (size == 0) { return; } - const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); - - for (const auto& [map_addr, map_size] : submapped_ranges) { - // Flush and invalidate through the GPU interface, to be asynchronous if possible. - const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr); - ASSERT(cpu_addr); + GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash); - rasterizer->UnmapMemory(*cpu_addr, map_size); + for (const auto& [map_addr, map_size] : page_stash) { + rasterizer->UnmapMemory(map_addr, map_size); } + page_stash.clear(); BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); @@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si } } -template <bool is_safe> +template <bool is_safe, bool use_fastmem> void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, [[maybe_unused]] VideoCommon::CacheType which) const { auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, @@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); + if constexpr (use_fastmem) { + std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); + } else { + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); + } dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -379,11 +388,15 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - if (!IsBigPageContinous(page_index)) [[unlikely]] { - memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); + if constexpr (use_fastmem) { + std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); } else { - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); + if (!IsBigPageContinous(page_index)) [[unlikely]] { + memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); + } else { + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); + } } dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; }; @@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, VideoCommon::CacheType which) const { - ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which); + if (fastmem_arena) [[likely]] { + ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which); + return; + } + ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which); } void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { - ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); + if (fastmem_arena) [[likely]] { + ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); + return; + } + ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); } template <bool is_safe> @@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); } +void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, + std::size_t size) { + WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); + accumulator->Add(gpu_dest_addr, size); +} + void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, VideoCommon::CacheType which) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, @@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( GPUVAddr gpu_addr, std::size_t size) const { std::vector<std::pair<GPUVAddr, std::size_t>> result{}; - std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; + GetSubmappedRangeImpl<true>(gpu_addr, size, result); + return result; +} + +template <bool is_gpu_address> +void MemoryManager::GetSubmappedRangeImpl( + GPUVAddr gpu_addr, std::size_t size, + std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& + result) const { + std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> + last_segment{}; std::optional<VAddr> old_page_addr{}; const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, @@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( } old_page_addr = {cpu_addr_base + copy_amount}; if (!last_segment) { - const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; - last_segment = {new_base_addr, copy_amount}; + if constexpr (is_gpu_address) { + const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; + last_segment = {new_base_addr, copy_amount}; + } else { + last_segment = {cpu_addr_base, copy_amount}; + } } else { last_segment->second += copy_amount; } @@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( } old_page_addr = {cpu_addr_base + copy_amount}; if (!last_segment) { - const GPUVAddr new_base_addr = (page_index << page_bits) + offset; - last_segment = {new_base_addr, copy_amount}; + if constexpr (is_gpu_address) { + const GPUVAddr new_base_addr = (page_index << page_bits) + offset; + last_segment = {new_base_addr, copy_amount}; + } else { + last_segment = {cpu_addr_base, copy_amount}; + } } else { last_segment->second += copy_amount; } @@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( }; MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); split(0, 0, 0); - return result; +} + +void MemoryManager::FlushCaching() { + if (!accumulator->AnyAccumulated()) { + return; + } + accumulator->Callback([this](GPUVAddr addr, size_t size) { + GetSubmappedRangeImpl<false>(addr, size, page_stash); + }); + rasterizer->InnerInvalidation(page_stash); + page_stash.clear(); + accumulator->Clear(); } } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 828e13439..2936364f0 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -19,6 +19,10 @@ namespace VideoCore { class RasterizerInterface; } +namespace VideoCommon { +class InvalidationAccumulator; +} + namespace Core { class DeviceMemory; namespace Memory { @@ -80,6 +84,7 @@ public: */ void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); + void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); /** * Checks if a gpu region can be simply read with a pointer. @@ -129,12 +134,14 @@ public: size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size = std::numeric_limits<size_t>::max()) const; + void FlushCaching(); + private: template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; - template <bool is_safe> + template <bool is_safe, bool use_fastmem> void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, VideoCommon::CacheType which) const; @@ -154,6 +161,12 @@ private: inline bool IsBigPageContinous(size_t big_page_index) const; inline void SetBigPageContinous(size_t big_page_index, bool value); + template <bool is_gpu_address> + void GetSubmappedRangeImpl( + GPUVAddr gpu_addr, std::size_t size, + std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& + result) const; + Core::System& system; Core::Memory::Memory& memory; Core::DeviceMemory& device_memory; @@ -201,10 +214,13 @@ private: Common::VirtualBuffer<u32> big_page_table_cpu; std::vector<u64> big_page_continous; + std::vector<std::pair<VAddr, std::size_t>> page_stash{}; + u8* fastmem_arena{}; constexpr static size_t continous_bits = 64; const size_t unique_identifier; + std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; static std::atomic<size_t> unique_identifier_generator; }; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index f44c7df50..1735b6164 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -6,6 +6,7 @@ #include <functional> #include <optional> #include <span> +#include <utility> #include "common/common_types.h" #include "common/polyfill_thread.h" #include "video_core/cache_types.h" @@ -95,6 +96,12 @@ public: virtual void InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; + virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { + for (const auto& [cpu_addr, size] : sequences) { + InvalidateRegion(cpu_addr, size); + } + } + /// Notify rasterizer that any caches of the specified region are desync with guest virtual void OnCPUWrite(VAddr addr, u64 size) = 0; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index f502a7d09..1578cb206 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -78,6 +78,8 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext return separated_extensions; } +} // Anonymous namespace + Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, VkSurfaceKHR surface) { const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); @@ -89,7 +91,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl const vk::PhysicalDevice physical_device(devices[device_index], dld); return Device(*instance, physical_device, surface, dld); } -} // Anonymous namespace RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, @@ -98,7 +99,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, - true, Settings::values.renderer_debug.GetValue())), + Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), @@ -109,6 +110,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, screen_info), rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, state_tracker, scheduler) { + if (Settings::values.renderer_force_max_clock.GetValue()) { + turbo_mode.emplace(instance, dld); + } Report(); } catch (const vk::Exception& exception) { LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index e7bfecb20..009e75e0d 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -13,6 +13,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/vk_turbo_mode.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -31,6 +32,9 @@ class GPU; namespace Vulkan { +Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, + VkSurfaceKHR surface); + class RendererVulkan final : public VideoCore::RendererBase { public: explicit RendererVulkan(Core::TelemetrySession& telemtry_session, @@ -74,6 +78,7 @@ private: Swapchain swapchain; BlitScreen blit_screen; RasterizerVulkan rasterizer; + std::optional<TurboMode> turbo_mode; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 487d8b416..b0153a502 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -336,6 +336,9 @@ void BufferCacheRuntime::Finish() { void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { + return; + } static constexpr VkMemoryBarrier READ_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, @@ -394,6 +397,9 @@ void BufferCacheRuntime::PostCopyBarrier() { } void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) { + if (dest_buffer == VK_NULL_HANDLE) { + return; + } static constexpr VkMemoryBarrier READ_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, @@ -473,6 +479,11 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); }); } else { + if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) { + ReserveNullBuffer(); + buffer = *null_buffer; + offset = 0; + } scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { cmdbuf.BindVertexBuffer(index, buffer, offset); }); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 04a3a861e..2a0f0dbf0 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -24,13 +24,15 @@ using Shader::ImageBufferDescriptor; using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; using Tegra::Texture::TexturePair; -ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_, + DescriptorPool& descriptor_pool, UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, PipelineStatistics* pipeline_statistics, VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, vk::ShaderModule spv_module_) - : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, + : device{device_}, pipeline_cache(pipeline_cache_), + update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { if (shader_notify) { shader_notify->MarkShaderBuilding(); @@ -56,23 +58,27 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript if (device.IsKhrPipelineExecutablePropertiesEnabled()) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } - pipeline = device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = flags, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, + pipeline = device.GetLogical().CreateComputePipeline( + { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = flags, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = + device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }); + *pipeline_cache); + if (pipeline_statistics) { pipeline_statistics->Collect(*pipeline); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index d70837fc5..78d77027f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -28,7 +28,8 @@ class Scheduler; class ComputePipeline { public: - explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, + explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache, + DescriptorPool& descriptor_pool, UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* thread_worker, PipelineStatistics* pipeline_statistics, @@ -46,6 +47,7 @@ public: private: const Device& device; + vk::PipelineCache& pipeline_cache; UpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 734c379b9..f91bb5a1d 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -234,13 +234,14 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m GraphicsPipeline::GraphicsPipeline( Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, + vk::PipelineCache& pipeline_cache_, VideoCore::ShaderNotify* shader_notify, + const Device& device_, DescriptorPool& descriptor_pool, UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages, const std::array<const Shader::Info*, NUM_STAGES>& infos) - : key{key_}, device{device_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, scheduler{scheduler_}, + : key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + pipeline_cache(pipeline_cache_), scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { if (shader_notify) { shader_notify->MarkShaderBuilding(); @@ -897,27 +898,29 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { if (device.IsKhrPipelineExecutablePropertiesEnabled()) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } - pipeline = device.GetLogical().CreateGraphicsPipeline({ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = flags, - .stageCount = static_cast<u32>(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = &tessellation_ci, - .pViewportState = &viewport_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisample_ci, - .pDepthStencilState = &depth_stencil_ci, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = render_pass, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }); + pipeline = device.GetLogical().CreateGraphicsPipeline( + { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = flags, + .stageCount = static_cast<u32>(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }, + *pipeline_cache); } void GraphicsPipeline::Validate() { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 1ed2967be..67c657d0e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -70,16 +70,14 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache, - TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify, - const Device& device, DescriptorPool& descriptor_pool, - UpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* worker_thread, - PipelineStatistics* pipeline_statistics, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key, - std::array<vk::ShaderModule, NUM_STAGES> stages, - const std::array<const Shader::Info*, NUM_STAGES>& infos); + explicit GraphicsPipeline( + Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, + vk::PipelineCache& pipeline_cache, VideoCore::ShaderNotify* shader_notify, + const Device& device, DescriptorPool& descriptor_pool, + UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, + PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages, + const std::array<const Shader::Info*, NUM_STAGES>& infos); GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; @@ -133,6 +131,7 @@ private: const Device& device; TextureCache& texture_cache; BufferCache& buffer_cache; + vk::PipelineCache& pipeline_cache; Scheduler& scheduler; UpdateDescriptorQueue& update_descriptor_queue; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3046b72ab..67e5bc648 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -55,6 +55,7 @@ using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; constexpr u32 CACHE_VERSION = 10; +constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'}; template <typename Container> auto MakeSpan(Container& container) { @@ -284,6 +285,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, + use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), serialization_thread(1, "VkPipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; @@ -362,7 +364,12 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device }; } -PipelineCache::~PipelineCache() = default; +PipelineCache::~PipelineCache() { + if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) { + SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, + CACHE_VERSION); + } +} GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); @@ -418,6 +425,12 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } pipeline_cache_filename = base_dir / "vulkan.bin"; + if (use_vulkan_pipeline_cache) { + vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin"; + vulkan_pipeline_cache = + LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, CACHE_VERSION); + } + struct { std::mutex mutex; size_t total{}; @@ -496,6 +509,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(stop_loading); + if (use_vulkan_pipeline_cache) { + SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, + CACHE_VERSION); + } + if (state.statistics) { state.statistics->Report(); } @@ -616,10 +634,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - return std::make_unique<GraphicsPipeline>(scheduler, buffer_cache, texture_cache, - &shader_notify, device, descriptor_pool, - update_descriptor_queue, thread_worker, statistics, - render_pass_cache, key, std::move(modules), infos); + return std::make_unique<GraphicsPipeline>( + scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device, + descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key, + std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -689,13 +707,107 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue, - thread_worker, statistics, &shader_notify, - program.info, std::move(spv_module)); + return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, descriptor_pool, + update_descriptor_queue, thread_worker, statistics, + &shader_notify, program.info, std::move(spv_module)); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); return nullptr; } +void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename, + const vk::PipelineCache& pipeline_cache, + u32 cache_version) try { + std::ofstream file(filename, std::ios::binary); + file.exceptions(std::ifstream::failbit); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open Vulkan driver pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + return; + } + file.write(VULKAN_CACHE_MAGIC_NUMBER.data(), VULKAN_CACHE_MAGIC_NUMBER.size()) + .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version)); + + size_t cache_size = 0; + std::vector<char> cache_data; + if (pipeline_cache) { + pipeline_cache.Read(&cache_size, nullptr); + cache_data.resize(cache_size); + pipeline_cache.Read(&cache_size, cache_data.data()); + } + file.write(cache_data.data(), cache_size); + + LOG_INFO(Render_Vulkan, "Vulkan driver pipelines cached at: {}", + Common::FS::PathToUTF8String(filename)); + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::path& filename, + u32 expected_cache_version) { + const auto create_pipeline_cache = [this](size_t data_size, const void* data) { + VkPipelineCacheCreateInfo pipeline_cache_ci = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .initialDataSize = data_size, + .pInitialData = data}; + return device.GetLogical().CreatePipelineCache(pipeline_cache_ci); + }; + try { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return create_pipeline_cache(0, nullptr); + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + + std::array<char, 8> magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); + if (magic_number != VULKAN_CACHE_MAGIC_NUMBER || cache_version != expected_cache_version) { + file.close(); + if (Common::FS::RemoveFile(filename)) { + if (magic_number != VULKAN_CACHE_MAGIC_NUMBER) { + LOG_ERROR(Common_Filesystem, "Invalid Vulkan driver pipeline cache file"); + } + if (cache_version != expected_cache_version) { + LOG_INFO(Common_Filesystem, "Deleting old Vulkan driver pipeline cache"); + } + } else { + LOG_ERROR(Common_Filesystem, + "Invalid Vulkan pipeline cache file and failed to delete it in \"{}\"", + Common::FS::PathToUTF8String(filename)); + } + return create_pipeline_cache(0, nullptr); + } + + const size_t cache_size = static_cast<size_t>(end) - magic_number.size(); + std::vector<char> cache_data(cache_size); + file.read(cache_data.data(), cache_size); + + LOG_INFO(Render_Vulkan, + "Loaded Vulkan driver pipeline cache: ", Common::FS::PathToUTF8String(filename)); + + return create_pipeline_cache(cache_size, cache_data.data()); + + } catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } + + return create_pipeline_cache(0, nullptr); + } +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index b4f593ef5..5171912d7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -135,6 +135,12 @@ private: PipelineStatistics* statistics, bool build_in_parallel); + void SerializeVulkanPipelineCache(const std::filesystem::path& filename, + const vk::PipelineCache& pipeline_cache, u32 cache_version); + + vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename, + u32 expected_cache_version); + const Device& device; Scheduler& scheduler; DescriptorPool& descriptor_pool; @@ -144,6 +150,7 @@ private: TextureCache& texture_cache; VideoCore::ShaderNotify& shader_notify; bool use_asynchronous_shaders{}; + bool use_vulkan_pipeline_cache{}; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; @@ -158,6 +165,9 @@ private: std::filesystem::path pipeline_cache_filename; + std::filesystem::path vulkan_pipeline_cache_filename; + vk::PipelineCache vulkan_pipeline_cache; + Common::ThreadWorker workers; Common::ThreadWorker serialization_thread; DynamicFeatures dynamic_features; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 242bf9602..ed4a72166 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); @@ -393,6 +394,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { void RasterizerVulkan::DispatchCompute() { FlushWork(); + gpu_memory->FlushCaching(); ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; if (!pipeline) { @@ -481,6 +483,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache } } +void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { + { + std::scoped_lock lock{texture_cache.mutex}; + for (const auto& [addr, size] : sequences) { + texture_cache.WriteMemory(addr, size); + } + } + { + std::scoped_lock lock{buffer_cache.mutex}; + for (const auto& [addr, size] : sequences) { + buffer_cache.WriteMemory(addr, size); + } + } + { + for (const auto& [addr, size] : sequences) { + query_cache.InvalidateRegion(addr, size); + pipeline_cache.InvalidateRegion(addr, size); + } + } +} + void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c661e5b19..472cc64d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -79,6 +79,7 @@ public: VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; void OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp new file mode 100644 index 000000000..852b86f84 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -0,0 +1,205 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/literals.h" +#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_turbo_mode.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { + +using namespace Common::Literals; + +TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) + : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { + m_thread = std::jthread([&](auto stop_token) { Run(stop_token); }); +} + +TurboMode::~TurboMode() = default; + +void TurboMode::Run(std::stop_token stop_token) { + auto& dld = m_device.GetLogical(); + + // Allocate buffer. 2MiB should be sufficient. + auto buffer = dld.CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = 2_MiB, + .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + + // Commit some device local memory for the buffer. + auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal); + + // Create the descriptor pool to contain our descriptor. + constexpr VkDescriptorPoolSize pool_size{ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + }; + + auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = &pool_size, + }); + + // Create the descriptor set layout from the pool. + constexpr VkDescriptorSetLayoutBinding layout_binding{ + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }; + + auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = 1, + .pBindings = &layout_binding, + }); + + // Actually create the descriptor set. + auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = *descriptor_pool, + .descriptorSetCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + }); + + // Create the shader. + auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV); + + // Create the pipeline layout. + auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + + // Actually create the pipeline. + const VkPipelineShaderStageCreateInfo shader_stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }; + + auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = shader_stage, + .layout = *pipeline_layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); + + // Create a fence to wait on. + auto fence = dld.CreateFence(VkFenceCreateInfo{ + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }); + + // Create a command pool to allocate a command buffer from. + auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = m_device.GetGraphicsFamily(), + }); + + // Create a single command buffer. + auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY); + auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()}; + + while (!stop_token.stop_requested()) { + // Reset the fence. + fence.Reset(); + + // Update descriptor set. + const VkDescriptorBufferInfo buffer_info{ + .buffer = *buffer, + .offset = 0, + .range = VK_WHOLE_SIZE, + }; + + const VkWriteDescriptorSet buffer_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set[0], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pImageInfo = nullptr, + .pBufferInfo = &buffer_info, + .pTexelBufferView = nullptr, + }; + + dld.UpdateDescriptorSets(std::array{buffer_write}, {}); + + // Set up the command buffer. + cmdbuf.Begin(VkCommandBufferBeginInfo{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); + + // Clear the buffer. + cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0); + + // Bind descriptor set. + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, {}); + + // Bind the pipeline. + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + + // Dispatch. + cmdbuf.Dispatch(64, 64, 1); + + // Finish. + cmdbuf.End(); + + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = 0, + .pSignalSemaphores = nullptr, + }; + + m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence); + + // Wait for completion. + fence.Wait(); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h new file mode 100644 index 000000000..2060e2395 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/polyfill_thread.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class TurboMode { +public: + explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld); + ~TurboMode(); + +private: + void Run(std::stop_token stop_token); + + Device m_device; + MemoryAllocator m_allocator; + std::jthread m_thread; +}; + +} // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 5c5bfa18d..77aee802d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1472,7 +1472,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { is_patch_list_restart_supported = primitive_topology_list_restart.primitiveTopologyPatchListRestart; } - if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { + if (requires_surface && has_khr_image_format_list && has_khr_swapchain_mutable_format) { extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); khr_swapchain_mutable_format = true; @@ -1487,6 +1487,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { max_push_descriptors = push_descriptor.maxPushDescriptors; } + + has_null_descriptor = true; + return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 920a8f4e3..6042046e1 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -397,6 +397,10 @@ public: return must_emulate_bgr565; } + bool HasNullDescriptor() const { + return has_null_descriptor; + } + u32 GetMaxVertexInputAttributes() const { return max_vertex_input_attributes; } @@ -511,6 +515,7 @@ private: bool supports_d24_depth{}; ///< Supports D24 depth buffers. bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + bool has_null_descriptor{}; ///< Has support for null descriptors. u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index 562039b56..b6d83e446 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -32,7 +32,7 @@ namespace Vulkan { namespace { [[nodiscard]] std::vector<const char*> RequiredExtensions( - Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) { + Core::Frontend::WindowSystemType window_type, bool enable_validation) { std::vector<const char*> extensions; extensions.reserve(6); switch (window_type) { @@ -65,7 +65,7 @@ namespace { if (window_type != Core::Frontend::WindowSystemType::Headless) { extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } - if (enable_debug_utils) { + if (enable_validation) { extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); @@ -95,9 +95,9 @@ namespace { return true; } -[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) { +[[nodiscard]] std::vector<const char*> Layers(bool enable_validation) { std::vector<const char*> layers; - if (enable_layers) { + if (enable_validation) { layers.push_back("VK_LAYER_KHRONOS_validation"); } return layers; @@ -125,7 +125,7 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, Core::Frontend::WindowSystemType window_type, - bool enable_debug_utils, bool enable_layers) { + bool enable_validation) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); @@ -138,11 +138,11 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); } - const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils); + const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation); if (!AreExtensionsSupported(dld, extensions)) { throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); } - std::vector<const char*> layers = Layers(enable_layers); + std::vector<const char*> layers = Layers(enable_validation); RemoveUnavailableLayers(dld, layers); const u32 available_version = vk::AvailableVersion(dld); diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h index 40419d802..b59b92f83 100644 --- a/src/video_core/vulkan_common/vulkan_instance.h +++ b/src/video_core/vulkan_common/vulkan_instance.h @@ -17,8 +17,7 @@ namespace Vulkan { * @param dld Dispatch table to load function pointers into * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1) * @param window_type Window system type's enabled extension - * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not - * @param enable_layers Whether to enable Vulkan validation layers or not + * @param enable_validation Whether to enable Vulkan validation layers or not * * @return A new Vulkan instance * @throw vk::Exception on failure @@ -26,6 +25,6 @@ namespace Vulkan { [[nodiscard]] vk::Instance CreateInstance( const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, - bool enable_debug_utils = false, bool enable_layers = false); + bool enable_validation = false); } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 861767c13..61be1fce1 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -152,6 +152,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCreateGraphicsPipelines); X(vkCreateImage); X(vkCreateImageView); + X(vkCreatePipelineCache); X(vkCreatePipelineLayout); X(vkCreateQueryPool); X(vkCreateRenderPass); @@ -171,6 +172,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkDestroyImage); X(vkDestroyImageView); X(vkDestroyPipeline); + X(vkDestroyPipelineCache); X(vkDestroyPipelineLayout); X(vkDestroyQueryPool); X(vkDestroyRenderPass); @@ -188,6 +190,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkGetEventStatus); X(vkGetFenceStatus); X(vkGetImageMemoryRequirements); + X(vkGetPipelineCacheData); X(vkGetMemoryFdKHR); #ifdef _WIN32 X(vkGetMemoryWin32HandleKHR); @@ -431,6 +434,10 @@ void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noex dld.vkDestroyPipeline(device, handle, nullptr); } +void Destroy(VkDevice device, VkPipelineCache handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyPipelineCache(device, handle, nullptr); +} + void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept { dld.vkDestroyPipelineLayout(device, handle, nullptr); } @@ -651,6 +658,10 @@ void ShaderModule::SetObjectNameEXT(const char* name) const { SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); } +void PipelineCache::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_PIPELINE_CACHE, name); +} + void Semaphore::SetObjectNameEXT(const char* name) const { SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); } @@ -746,21 +757,29 @@ DescriptorSetLayout Device::CreateDescriptorSetLayout( return DescriptorSetLayout(object, handle, *dld); } +PipelineCache Device::CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const { + VkPipelineCache cache; + Check(dld->vkCreatePipelineCache(handle, &ci, nullptr, &cache)); + return PipelineCache(cache, handle, *dld); +} + PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { VkPipelineLayout object; Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); return PipelineLayout(object, handle, *dld); } -Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const { +Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci, + VkPipelineCache cache) const { VkPipeline object; - Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object)); + Check(dld->vkCreateGraphicsPipelines(handle, cache, 1, &ci, nullptr, &object)); return Pipeline(object, handle, *dld); } -Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const { +Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci, + VkPipelineCache cache) const { VkPipeline object; - Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object)); + Check(dld->vkCreateComputePipelines(handle, cache, 1, &ci, nullptr, &object)); return Pipeline(object, handle, *dld); } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index accfad8c1..412779b51 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -270,6 +270,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{}; PFN_vkCreateImage vkCreateImage{}; PFN_vkCreateImageView vkCreateImageView{}; + PFN_vkCreatePipelineCache vkCreatePipelineCache{}; PFN_vkCreatePipelineLayout vkCreatePipelineLayout{}; PFN_vkCreateQueryPool vkCreateQueryPool{}; PFN_vkCreateRenderPass vkCreateRenderPass{}; @@ -289,6 +290,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkDestroyImage vkDestroyImage{}; PFN_vkDestroyImageView vkDestroyImageView{}; PFN_vkDestroyPipeline vkDestroyPipeline{}; + PFN_vkDestroyPipelineCache vkDestroyPipelineCache{}; PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{}; PFN_vkDestroyQueryPool vkDestroyQueryPool{}; PFN_vkDestroyRenderPass vkDestroyRenderPass{}; @@ -306,6 +308,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkGetEventStatus vkGetEventStatus{}; PFN_vkGetFenceStatus vkGetFenceStatus{}; PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; + PFN_vkGetPipelineCacheData vkGetPipelineCacheData{}; PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{}; #ifdef _WIN32 PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; @@ -351,6 +354,7 @@ void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkPipelineCache, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept; @@ -773,6 +777,18 @@ public: void SetObjectNameEXT(const char* name) const; }; +class PipelineCache : public Handle<VkPipelineCache, VkDevice, DeviceDispatch> { + using Handle<VkPipelineCache, VkDevice, DeviceDispatch>::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + + VkResult Read(size_t* size, void* data) const noexcept { + return dld->vkGetPipelineCacheData(owner, handle, size, data); + } +}; + class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; @@ -844,11 +860,15 @@ public: DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; + PipelineCache CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const; + PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; - Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const; + Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci, + VkPipelineCache cache = nullptr) const; - Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const; + Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci, + VkPipelineCache cache = nullptr) const; Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index e9425b5bd..0db62baa3 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -690,6 +690,7 @@ void Config::ReadRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); ReadGlobalSetting(Settings::values.renderer_backend); + ReadGlobalSetting(Settings::values.renderer_force_max_clock); ReadGlobalSetting(Settings::values.vulkan_device); ReadGlobalSetting(Settings::values.fullscreen_mode); ReadGlobalSetting(Settings::values.aspect_ratio); @@ -709,6 +710,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); ReadGlobalSetting(Settings::values.use_pessimistic_flushes); + ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); ReadGlobalSetting(Settings::values.bg_red); ReadGlobalSetting(Settings::values.bg_green); ReadGlobalSetting(Settings::values.bg_blue); @@ -1305,6 +1307,9 @@ void Config::SaveRendererValues() { static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), static_cast<u32>(Settings::values.renderer_backend.GetDefault()), Settings::values.renderer_backend.UsingGlobal()); + WriteSetting(QString::fromStdString(Settings::values.renderer_force_max_clock.GetLabel()), + static_cast<u32>(Settings::values.renderer_force_max_clock.GetValue(global)), + static_cast<u32>(Settings::values.renderer_force_max_clock.GetDefault())); WriteGlobalSetting(Settings::values.vulkan_device); WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)), @@ -1348,6 +1353,7 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); WriteGlobalSetting(Settings::values.use_pessimistic_flushes); + WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); WriteGlobalSetting(Settings::values.bg_red); WriteGlobalSetting(Settings::values.bg_green); WriteGlobalSetting(Settings::values.bg_blue); diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 01f074699..fdf8485ce 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -25,10 +25,13 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); + ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue()); + ui->use_vulkan_driver_pipeline_cache->setChecked( + Settings::values.use_vulkan_driver_pipeline_cache.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setCurrentIndex( @@ -37,6 +40,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { Settings::values.max_anisotropy.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); + ConfigurationShared::SetPerGameSetting(ui->renderer_force_max_clock, + &Settings::values.renderer_force_max_clock); ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, &Settings::values.max_anisotropy); ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, @@ -48,6 +53,9 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, + ui->renderer_force_max_clock, + renderer_force_max_clock); ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, ui->anisotropic_filtering_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); @@ -58,6 +66,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ui->use_fast_gpu_time, use_fast_gpu_time); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes, ui->use_pessimistic_flushes, use_pessimistic_flushes); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache, + ui->use_vulkan_driver_pipeline_cache, + use_vulkan_driver_pipeline_cache); } void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { @@ -76,18 +87,25 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { // Disable if not global (only happens during game) if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); + ui->renderer_force_max_clock->setEnabled( + Settings::values.renderer_force_max_clock.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); ui->use_pessimistic_flushes->setEnabled( Settings::values.use_pessimistic_flushes.UsingGlobal()); + ui->use_vulkan_driver_pipeline_cache->setEnabled( + Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal()); ui->anisotropic_filtering_combobox->setEnabled( Settings::values.max_anisotropy.UsingGlobal()); return; } + ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, + Settings::values.renderer_force_max_clock, + renderer_force_max_clock); ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, Settings::values.use_asynchronous_shaders, @@ -97,6 +115,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes, Settings::values.use_pessimistic_flushes, use_pessimistic_flushes); + ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache, + Settings::values.use_vulkan_driver_pipeline_cache, + use_vulkan_driver_pipeline_cache); ConfigurationShared::SetColoredComboBox( ui->gpu_accuracy, ui->label_gpu_accuracy, static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 12e816905..df557d585 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -36,10 +36,12 @@ private: std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; + ConfigurationShared::CheckState renderer_force_max_clock; ConfigurationShared::CheckState use_vsync; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; ConfigurationShared::CheckState use_pessimistic_flushes; + ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache; const Core::System& system; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 87a121471..061885e30 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -70,6 +70,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="renderer_force_max_clock"> + <property name="toolTip"> + <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string> + </property> + <property name="text"> + <string>Force maximum clocks (Vulkan only)</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="use_vsync"> <property name="toolTip"> <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> @@ -110,6 +120,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache"> + <property name="toolTip"> + <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string> + </property> + <property name="text"> + <string>Use Vulkan pipeline cache</string> + </property> + </widget> + </item> + <item> <widget class="QWidget" name="af_layout" native="true"> <layout class="QHBoxLayout" name="horizontalLayout_1"> <property name="leftMargin"> diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 524650144..c55f81c2f 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -2229,8 +2229,10 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ } switch (target) { - case GameListRemoveTarget::GlShaderCache: case GameListRemoveTarget::VkShaderCache: + RemoveVulkanDriverPipelineCache(program_id); + [[fallthrough]]; + case GameListRemoveTarget::GlShaderCache: RemoveTransferableShaderCache(program_id, target); break; case GameListRemoveTarget::AllShaderCache: @@ -2271,6 +2273,22 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTa } } +void GMainWindow::RemoveVulkanDriverPipelineCache(u64 program_id) { + static constexpr std::string_view target_file_name = "vulkan_pipelines.bin"; + + const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); + const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id); + const auto target_file = shader_cache_folder_path / target_file_name; + + if (!Common::FS::Exists(target_file)) { + return; + } + if (!Common::FS::RemoveFile(target_file)) { + QMessageBox::warning(this, tr("Error Removing Vulkan Driver Pipeline Cache"), + tr("Failed to remove the driver pipeline cache.")); + } +} + void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) { const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id); diff --git a/src/yuzu/main.h b/src/yuzu/main.h index db318485d..f25ce65a8 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -347,6 +347,7 @@ private: void RemoveUpdateContent(u64 program_id, InstalledEntryType type); void RemoveAddOnContent(u64 program_id, InstalledEntryType type); void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target); + void RemoveVulkanDriverPipelineCache(u64 program_id); void RemoveAllTransferableShaderCaches(u64 program_id); void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 1e45e57bc..527017282 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -296,6 +296,7 @@ void Config::ReadValues() { // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); + ReadSetting("Renderer", Settings::values.renderer_force_max_clock); ReadSetting("Renderer", Settings::values.renderer_debug); ReadSetting("Renderer", Settings::values.renderer_shader_feedback); ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); @@ -321,6 +322,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.use_fast_gpu_time); ReadSetting("Renderer", Settings::values.use_pessimistic_flushes); + ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache); ReadSetting("Renderer", Settings::values.bg_red); ReadSetting("Renderer", Settings::values.bg_green); |