summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp719
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp2
4 files changed, 104 insertions, 649 deletions
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 85e21f611..e362d13c5 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -23,7 +23,7 @@
namespace Vulkan {
struct GraphicsPipelineCacheKey {
- std::array<u128, 6> unique_hashes;
+ std::array<u64, 6> unique_hashes;
FixedPipelineState state;
size_t Hash() const noexcept;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 9d9729022..0822862fe 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -11,7 +11,8 @@
#include "common/bit_cast.h"
#include "common/cityhash.h"
-#include "common/file_util.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/microprofile.h"
#include "common/thread_worker.h"
#include "core/core.h"
@@ -36,6 +37,7 @@
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -43,449 +45,19 @@
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
-template <typename Container>
-auto MakeSpan(Container& container) {
- return std::span(container.data(), container.size());
-}
-
-static u64 MakeCbufKey(u32 index, u32 offset) {
- return (static_cast<u64>(index) << 32) | offset;
-}
-
-class GenericEnvironment : public Shader::Environment {
-public:
- explicit GenericEnvironment() = default;
- explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
- u32 start_address_)
- : gpu_memory{&gpu_memory_}, program_base{program_base_} {
- start_address = start_address_;
- }
-
- ~GenericEnvironment() override = default;
-
- u32 TextureBoundBuffer() const final {
- return texture_bound;
- }
-
- u32 LocalMemorySize() const final {
- return local_memory_size;
- }
-
- u32 SharedMemorySize() const final {
- return shared_memory_size;
- }
-
- std::array<u32, 3> WorkgroupSize() const final {
- return workgroup_size;
- }
-
- u64 ReadInstruction(u32 address) final {
- read_lowest = std::min(read_lowest, address);
- read_highest = std::max(read_highest, address);
-
- if (address >= cached_lowest && address < cached_highest) {
- return code[(address - cached_lowest) / INST_SIZE];
- }
- has_unbound_instructions = true;
- return gpu_memory->Read<u64>(program_base + address);
- }
-
- std::optional<u128> Analyze() {
- const std::optional<u64> size{TryFindSize()};
- if (!size) {
- return std::nullopt;
- }
- cached_lowest = start_address;
- cached_highest = start_address + static_cast<u32>(*size);
- return Common::CityHash128(reinterpret_cast<const char*>(code.data()), *size);
- }
-
- void SetCachedSize(size_t size_bytes) {
- cached_lowest = start_address;
- cached_highest = start_address + static_cast<u32>(size_bytes);
- code.resize(CachedSize());
- gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
- }
-
- [[nodiscard]] size_t CachedSize() const noexcept {
- return cached_highest - cached_lowest + INST_SIZE;
- }
-
- [[nodiscard]] size_t ReadSize() const noexcept {
- return read_highest - read_lowest + INST_SIZE;
- }
-
- [[nodiscard]] bool CanBeSerialized() const noexcept {
- return !has_unbound_instructions;
- }
-
- [[nodiscard]] u128 CalculateHash() const {
- const size_t size{ReadSize()};
- const auto data{std::make_unique<char[]>(size)};
- gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
- return Common::CityHash128(data.get(), size);
- }
-
- void Serialize(std::ofstream& file) const {
- const u64 code_size{static_cast<u64>(CachedSize())};
- const u64 num_texture_types{static_cast<u64>(texture_types.size())};
- const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
-
- file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
- .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
- .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
- .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
- .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
- .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
- .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
- .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
- .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
- .write(reinterpret_cast<const char*>(code.data()), code_size);
- for (const auto [key, type] : texture_types) {
- file.write(reinterpret_cast<const char*>(&key), sizeof(key))
- .write(reinterpret_cast<const char*>(&type), sizeof(type));
- }
- for (const auto [key, type] : cbuf_values) {
- file.write(reinterpret_cast<const char*>(&key), sizeof(key))
- .write(reinterpret_cast<const char*>(&type), sizeof(type));
- }
- if (stage == Shader::Stage::Compute) {
- file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
- .write(reinterpret_cast<const char*>(&shared_memory_size),
- sizeof(shared_memory_size));
- } else {
- file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
- }
- }
-
-protected:
- static constexpr size_t INST_SIZE = sizeof(u64);
-
- std::optional<u64> TryFindSize() {
- constexpr size_t BLOCK_SIZE = 0x1000;
- constexpr size_t MAXIMUM_SIZE = 0x100000;
-
- constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
- constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
-
- GPUVAddr guest_addr{program_base + start_address};
- size_t offset{0};
- size_t size{BLOCK_SIZE};
- while (size <= MAXIMUM_SIZE) {
- code.resize(size / INST_SIZE);
- u64* const data = code.data() + offset / INST_SIZE;
- gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
- for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
- const u64 inst = data[index / INST_SIZE];
- if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
- return offset + index;
- }
- }
- guest_addr += BLOCK_SIZE;
- size += BLOCK_SIZE;
- offset += BLOCK_SIZE;
- }
- return std::nullopt;
- }
-
- Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
- u32 raw) {
- const TextureHandle handle{raw, via_header_index};
- const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
- Tegra::Texture::TICEntry entry;
- gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
-
- const Shader::TextureType result{[&] {
- switch (entry.texture_type) {
- case Tegra::Texture::TextureType::Texture1D:
- return Shader::TextureType::Color1D;
- case Tegra::Texture::TextureType::Texture2D:
- case Tegra::Texture::TextureType::Texture2DNoMipmap:
- return Shader::TextureType::Color2D;
- case Tegra::Texture::TextureType::Texture3D:
- return Shader::TextureType::Color3D;
- case Tegra::Texture::TextureType::TextureCubemap:
- return Shader::TextureType::ColorCube;
- case Tegra::Texture::TextureType::Texture1DArray:
- return Shader::TextureType::ColorArray1D;
- case Tegra::Texture::TextureType::Texture2DArray:
- return Shader::TextureType::ColorArray2D;
- case Tegra::Texture::TextureType::Texture1DBuffer:
- return Shader::TextureType::Buffer;
- case Tegra::Texture::TextureType::TextureCubeArray:
- return Shader::TextureType::ColorArrayCube;
- default:
- throw Shader::NotImplementedException("Unknown texture type");
- }
- }()};
- texture_types.emplace(raw, result);
- return result;
- }
-
- Tegra::MemoryManager* gpu_memory{};
- GPUVAddr program_base{};
-
- std::vector<u64> code;
- std::unordered_map<u32, Shader::TextureType> texture_types;
- std::unordered_map<u64, u32> cbuf_values;
-
- u32 local_memory_size{};
- u32 texture_bound{};
- u32 shared_memory_size{};
- std::array<u32, 3> workgroup_size{};
-
- u32 read_lowest = std::numeric_limits<u32>::max();
- u32 read_highest = 0;
-
- u32 cached_lowest = std::numeric_limits<u32>::max();
- u32 cached_highest = 0;
-
- bool has_unbound_instructions = false;
-};
-
namespace {
using Shader::Backend::SPIRV::EmitSPIRV;
using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::FileEnvironment;
+using VideoCommon::GenericEnvironment;
+using VideoCommon::GraphicsEnvironment;
-// TODO: Move this to a separate file
-constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
-constexpr u32 CACHE_VERSION{2};
-
-class GraphicsEnvironment final : public GenericEnvironment {
-public:
- explicit GraphicsEnvironment() = default;
- explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program,
- GPUVAddr program_base_, u32 start_address_)
- : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
- gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
- switch (program) {
- case Maxwell::ShaderProgram::VertexA:
- stage = Shader::Stage::VertexA;
- stage_index = 0;
- break;
- case Maxwell::ShaderProgram::VertexB:
- stage = Shader::Stage::VertexB;
- stage_index = 0;
- break;
- case Maxwell::ShaderProgram::TesselationControl:
- stage = Shader::Stage::TessellationControl;
- stage_index = 1;
- break;
- case Maxwell::ShaderProgram::TesselationEval:
- stage = Shader::Stage::TessellationEval;
- stage_index = 2;
- break;
- case Maxwell::ShaderProgram::Geometry:
- stage = Shader::Stage::Geometry;
- stage_index = 3;
- break;
- case Maxwell::ShaderProgram::Fragment:
- stage = Shader::Stage::Fragment;
- stage_index = 4;
- break;
- default:
- UNREACHABLE_MSG("Invalid program={}", program);
- break;
- }
- const u64 local_size{sph.LocalMemorySize()};
- ASSERT(local_size <= std::numeric_limits<u32>::max());
- local_memory_size = static_cast<u32>(local_size);
- texture_bound = maxwell3d->regs.tex_cb_index;
- }
-
- ~GraphicsEnvironment() override = default;
-
- u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
- const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
- ASSERT(cbuf.enabled);
- u32 value{};
- if (cbuf_offset < cbuf.size) {
- value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
- }
- cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
- return value;
- }
-
- Shader::TextureType ReadTextureType(u32 handle) override {
- const auto& regs{maxwell3d->regs};
- const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
- return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
- }
-
-private:
- Tegra::Engines::Maxwell3D* maxwell3d{};
- size_t stage_index{};
-};
-
-class ComputeEnvironment final : public GenericEnvironment {
-public:
- explicit ComputeEnvironment() = default;
- explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
- u32 start_address_)
- : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
- &kepler_compute_} {
- const auto& qmd{kepler_compute->launch_description};
- stage = Shader::Stage::Compute;
- local_memory_size = qmd.local_pos_alloc;
- texture_bound = kepler_compute->regs.tex_cb_index;
- shared_memory_size = qmd.shared_alloc;
- workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
- }
-
- ~ComputeEnvironment() override = default;
-
- u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
- const auto& qmd{kepler_compute->launch_description};
- ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
- const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
- u32 value{};
- if (cbuf_offset < cbuf.size) {
- value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
- }
- cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
- return value;
- }
-
- Shader::TextureType ReadTextureType(u32 handle) override {
- const auto& regs{kepler_compute->regs};
- const auto& qmd{kepler_compute->launch_description};
- return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
- }
-
-private:
- Tegra::Engines::KeplerCompute* kepler_compute{};
-};
-
-void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
- std::ofstream& file) {
- if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
- return;
- }
- const u32 num_envs{static_cast<u32>(envs.size())};
- file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
- for (const GenericEnvironment* const env : envs) {
- env->Serialize(file);
- }
- file.write(key.data(), key.size_bytes());
-}
-
-template <typename Key, typename Envs>
-void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) {
- try {
- std::ofstream file;
- file.exceptions(std::ifstream::failbit);
- Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app);
- if (!file.is_open()) {
- LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename);
- return;
- }
- if (file.tellp() == 0) {
- file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
- .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION));
- }
- const std::span key_span(reinterpret_cast<const char*>(&key), sizeof(key));
- SerializePipeline(key_span, MakeSpan(envs), file);
-
- } catch (const std::ios_base::failure& e) {
- LOG_ERROR(Common_Filesystem, "{}", e.what());
- if (!Common::FS::Delete(filename)) {
- LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename);
- }
- }
+template <typename Container>
+auto MakeSpan(Container& container) {
+ return std::span(container.data(), container.size());
}
-class FileEnvironment final : public Shader::Environment {
-public:
- void Deserialize(std::ifstream& file) {
- u64 code_size{};
- u64 num_texture_types{};
- u64 num_cbuf_values{};
- file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
- .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
- .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
- .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
- .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
- .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
- .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
- .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
- .read(reinterpret_cast<char*>(&stage), sizeof(stage));
- code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
- file.read(reinterpret_cast<char*>(code.get()), code_size);
- for (size_t i = 0; i < num_texture_types; ++i) {
- u32 key;
- Shader::TextureType type;
- file.read(reinterpret_cast<char*>(&key), sizeof(key))
- .read(reinterpret_cast<char*>(&type), sizeof(type));
- texture_types.emplace(key, type);
- }
- for (size_t i = 0; i < num_cbuf_values; ++i) {
- u64 key;
- u32 value;
- file.read(reinterpret_cast<char*>(&key), sizeof(key))
- .read(reinterpret_cast<char*>(&value), sizeof(value));
- cbuf_values.emplace(key, value);
- }
- if (stage == Shader::Stage::Compute) {
- file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
- .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
- } else {
- file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
- }
- }
-
- u64 ReadInstruction(u32 address) override {
- if (address < read_lowest || address > read_highest) {
- throw Shader::LogicError("Out of bounds address {}", address);
- }
- return code[(address - read_lowest) / sizeof(u64)];
- }
-
- u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
- const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
- if (it == cbuf_values.end()) {
- throw Shader::LogicError("Uncached read texture type");
- }
- return it->second;
- }
-
- Shader::TextureType ReadTextureType(u32 handle) override {
- const auto it{texture_types.find(handle)};
- if (it == texture_types.end()) {
- throw Shader::LogicError("Uncached read texture type");
- }
- return it->second;
- }
-
- u32 LocalMemorySize() const override {
- return local_memory_size;
- }
-
- u32 SharedMemorySize() const override {
- return shared_memory_size;
- }
-
- u32 TextureBoundBuffer() const override {
- return texture_bound;
- }
-
- std::array<u32, 3> WorkgroupSize() const override {
- return workgroup_size;
- }
-
-private:
- std::unique_ptr<u64[]> code;
- std::unordered_map<u32, Shader::TextureType> texture_types;
- std::unordered_map<u64, u32> cbuf_values;
- std::array<u32, 3> workgroup_size{};
- u32 local_memory_size{};
- u32 shared_memory_size{};
- u32 texture_bound{};
- u32 read_lowest{};
- u32 read_highest{};
-};
-
Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
switch (comparison) {
case Maxwell::ComparisonOp::Never:
@@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso
}
} // Anonymous namespace
-void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback) {
- if (title_id == 0) {
- return;
- }
- std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)};
- std::string base_dir{shader_dir + "/vulkan"};
- std::string transferable_dir{base_dir + "/transferable"};
- std::string precompiled_dir{base_dir + "/precompiled"};
- if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) ||
- !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) {
- LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
- return;
- }
- pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id);
-
- struct {
- std::mutex mutex;
- size_t total{0};
- size_t built{0};
- bool has_loaded{false};
- } state;
-
- std::ifstream file;
- Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate);
- if (!file.is_open()) {
- return;
- }
- file.exceptions(std::ifstream::failbit);
- const auto end{file.tellg()};
- file.seekg(0, std::ios::beg);
-
- std::array<char, 8> magic_number;
- u32 cache_version;
- file.read(magic_number.data(), magic_number.size())
- .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
- if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) {
- file.close();
- if (Common::FS::Delete(pipeline_cache_filename)) {
- if (magic_number != MAGIC_NUMBER) {
- LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file");
- }
- if (cache_version != CACHE_VERSION) {
- LOG_INFO(Render_Vulkan, "Deleting old pipeline cache");
- }
- } else {
- LOG_ERROR(Render_Vulkan,
- "Invalid pipeline cache file and failed to delete it in \"{}\"",
- pipeline_cache_filename);
- }
- return;
- }
- while (file.tellg() != end) {
- if (stop_loading) {
- return;
- }
- u32 num_envs{};
- file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
- std::vector<FileEnvironment> envs(num_envs);
- for (FileEnvironment& env : envs) {
- env.Deserialize(file);
- }
- if (envs.front().ShaderStage() == Shader::Stage::Compute) {
- ComputePipelineCacheKey key;
- file.read(reinterpret_cast<char*>(&key), sizeof(key));
-
- workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
- ShaderPools pools;
- auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)};
-
- std::lock_guard lock{state.mutex};
- compute_cache.emplace(key, std::move(pipeline));
- ++state.built;
- if (state.has_loaded) {
- callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
- }
- });
- } else {
- GraphicsPipelineCacheKey key;
- file.read(reinterpret_cast<char*>(&key), sizeof(key));
-
- workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
- ShaderPools pools;
- boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
- for (auto& env : envs) {
- env_ptrs.push_back(&env);
- }
- auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
-
- std::lock_guard lock{state.mutex};
- graphics_cache.emplace(key, std::move(pipeline));
- ++state.built;
- if (state.has_loaded) {
- callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
- }
- });
- }
- ++state.total;
- }
- {
- std::lock_guard lock{state.mutex};
- callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
- state.has_loaded = true;
- }
- workers.WaitForRequests();
-}
-
size_t ComputePipelineCacheKey::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
return static_cast<size_t>(hash);
@@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
return std::memcmp(&rhs, this, Size()) == 0;
}
-PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
TextureCache& texture_cache_)
- : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
- kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
- scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+ : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
+ device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
buffer_cache{buffer_cache_}, texture_cache{texture_cache_},
workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
@@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default;
GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
- if (!RefreshStages()) {
+ if (!RefreshStages(graphics_key.unique_hashes)) {
current_pipeline = nullptr;
return nullptr;
}
@@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
ComputePipeline* PipelineCache::CurrentComputePipeline() {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
- const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
- const auto& qmd{kepler_compute.launch_description};
- const GPUVAddr shader_addr{program_base + qmd.program_start};
- const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
- if (!cpu_shader_addr) {
- return nullptr;
- }
- const ShaderInfo* shader{TryGet(*cpu_shader_addr)};
+ const ShaderInfo* const shader{ComputeShader()};
if (!shader) {
- ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
- shader = MakeShaderInfo(env, *cpu_shader_addr);
+ return nullptr;
}
+ const auto& qmd{kepler_compute.launch_description};
const ComputePipelineCacheKey key{
- .unique_hash{shader->unique_hash},
- .shared_memory_size{qmd.shared_alloc},
+ .unique_hash = shader->unique_hash,
+ .shared_memory_size = qmd.shared_alloc,
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
};
const auto [pair, is_new]{compute_cache.try_emplace(key)};
@@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
return pipeline.get();
}
-bool PipelineCache::RefreshStages() {
- auto& dirty{maxwell3d.dirty.flags};
- if (!dirty[VideoCommon::Dirty::Shaders]) {
- return last_valid_shaders;
+void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {
+ if (title_id == 0) {
+ return;
}
- dirty[VideoCommon::Dirty::Shaders] = false;
-
- const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
- for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
- graphics_key.unique_hashes[index] = u128{};
- continue;
- }
- const auto& shader_config{maxwell3d.regs.shader_config[index]};
- const auto program{static_cast<Maxwell::ShaderProgram>(index)};
- const GPUVAddr shader_addr{base_addr + shader_config.offset};
- const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
- if (!cpu_shader_addr) {
- LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr);
- last_valid_shaders = false;
- return false;
- }
- const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
- if (!shader_info) {
- const u32 start_address{shader_config.offset};
- GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
- shader_info = MakeShaderInfo(env, *cpu_shader_addr);
- }
- shader_infos[index] = shader_info;
- graphics_key.unique_hashes[index] = shader_info->unique_hash;
+ auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
+ auto base_dir{shader_dir / "vulkan"};
+ auto transferable_dir{base_dir / "transferable"};
+ auto precompiled_dir{base_dir / "precompiled"};
+ if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) ||
+ !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
+ return;
}
- last_valid_shaders = true;
- return true;
-}
+ pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id);
-const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
- auto info = std::make_unique<ShaderInfo>();
- if (const std::optional<u128> cached_hash{env.Analyze()}) {
- info->unique_hash = *cached_hash;
- info->size_bytes = env.CachedSize();
- } else {
- // Slow path, not really hit on commercial games
- // Build a control flow graph to get the real shader size
- main_pools.flow_block.ReleaseContents();
- Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()};
- info->unique_hash = env.CalculateHash();
- info->size_bytes = env.ReadSize();
- }
- const size_t size_bytes{info->size_bytes};
- const ShaderInfo* const result{info.get()};
- Register(std::move(info), cpu_addr, size_bytes);
- return result;
+ struct {
+ std::mutex mutex;
+ size_t total{0};
+ size_t built{0};
+ bool has_loaded{false};
+ } state;
+
+ const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
+ ComputePipelineCacheKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+ workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
+ ShaderPools pools;
+ auto pipeline{CreateComputePipeline(pools, key, env, false)};
+
+ std::lock_guard lock{state.mutex};
+ compute_cache.emplace(key, std::move(pipeline));
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
+ GraphicsPipelineCacheKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+ workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
+ ShaderPools pools;
+ boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+ for (auto& env : envs) {
+ env_ptrs.push_back(&env);
+ }
+ auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
+
+ std::lock_guard lock{state.mutex};
+ graphics_cache.emplace(key, std::move(pipeline));
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics);
+
+ std::unique_lock lock{state.mutex};
+ callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
+ state.has_loaded = true;
+ lock.unlock();
+
+ workers.WaitForRequests();
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
@@ -815,7 +288,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
size_t env_index{0};
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- if (key.unique_hashes[index] == u128{}) {
+ if (key.unique_hashes[index] == 0) {
continue;
}
Shader::Environment& env{*envs[env_index]};
@@ -830,7 +303,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
u32 binding{0};
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- if (key.unique_hashes[index] == u128{}) {
+ if (key.unique_hashes[index] == 0) {
continue;
}
UNIMPLEMENTED_IF(index == 0);
@@ -844,8 +317,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
device.SaveShader(code);
modules[stage_index] = BuildShader(device, code);
if (device.HasDebuggingToolAttached()) {
- const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0],
- key.unique_hashes[index][1])};
+ const std::string name{fmt::format("{:016x}", key.unique_hashes[index])};
modules[stage_index].SetObjectNameEXT(name.c_str());
}
}
@@ -863,7 +335,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- if (graphics_key.unique_hashes[index] == u128{}) {
+ if (graphics_key.unique_hashes[index] == 0) {
continue;
}
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -871,7 +343,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
const u32 start_address{maxwell3d.regs.shader_config[index].offset};
env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
env.SetCachedSize(shader_infos[index]->size_bytes);
-
envs.push_back(&env);
}
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)};
@@ -882,11 +353,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
env_ptrs;
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- if (key.unique_hashes[index] != u128{}) {
+ if (key.unique_hashes[index] != 0) {
env_ptrs.push_back(&envs[index]);
}
}
- SerializePipeline(key, env_ptrs, pipeline_cache_filename);
+ VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename);
});
return pipeline;
}
@@ -902,8 +373,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
if (!pipeline_cache_filename.empty()) {
serialization_thread.QueueWork([this, key, env = std::move(env)] {
- SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
- pipeline_cache_filename);
+ VideoCommon::SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
+ pipeline_cache_filename);
});
}
return pipeline;
@@ -921,7 +392,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)};
if (device.HasDebuggingToolAttached()) {
- const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])};
+ const auto name{fmt::format("{:016x}", key.unique_hash)};
spv_module.SetObjectNameEXT(name.c_str());
}
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
@@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
Shader::Profile profile{base_profile};
const Shader::Stage stage{program.stage};
- const bool has_geometry{key.unique_hashes[4] != u128{}};
+ const bool has_geometry{key.unique_hashes[4] != 0};
const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
const float point_size{Common::BitCast<float>(key.state.point_size)};
switch (stage) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index eec17d3fd..4e48b4956 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
+#include <filesystem>
#include <iosfwd>
#include <memory>
#include <type_traits>
@@ -42,7 +43,7 @@ namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ComputePipelineCacheKey {
- u128 unique_hash;
+ u64 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
@@ -76,16 +77,12 @@ namespace Vulkan {
class ComputePipeline;
class Device;
class DescriptorPool;
-class GenericEnvironment;
class RasterizerVulkan;
class RenderPassCache;
class VKScheduler;
class VKUpdateDescriptorQueue;
-struct ShaderInfo {
- u128 unique_hash{};
- size_t size_bytes{};
-};
+using VideoCommon::ShaderInfo;
struct ShaderPools {
void ReleaseContents() {
@@ -99,17 +96,16 @@ struct ShaderPools {
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
-class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> {
+class PipelineCache : public VideoCommon::ShaderCache {
public:
- explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
- Tegra::Engines::Maxwell3D& maxwell3d,
+ explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, const Device& device,
VKScheduler& scheduler, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
TextureCache& texture_cache);
- ~PipelineCache() override;
+ ~PipelineCache();
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
@@ -119,10 +115,6 @@ public:
const VideoCore::DiskResourceLoadCallback& callback);
private:
- bool RefreshStages();
-
- const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
-
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
@@ -140,11 +132,6 @@ private:
Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key,
const Shader::IR::Program& program);
- Tegra::GPU& gpu;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
- Tegra::MemoryManager& gpu_memory;
-
const Device& device;
VKScheduler& scheduler;
DescriptorPool& descriptor_pool;
@@ -156,16 +143,13 @@ private:
GraphicsPipelineCacheKey graphics_key{};
GraphicsPipeline* current_pipeline{};
- std::array<const ShaderInfo*, 6> shader_infos{};
- bool last_valid_shaders{};
-
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
ShaderPools main_pools;
Shader::Profile base_profile;
- std::string pipeline_cache_filename;
+ std::filesystem::path pipeline_cache_filename;
Common::ThreadWorker workers;
Common::ThreadWorker serialization_thread;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7df169c85..fa6daeb3a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
update_descriptor_queue, descriptor_pool),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
+ pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
texture_cache),
query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache },