1 files changed, 518 insertions, 349 deletions
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8991505ca..57b163247 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -4,444 +4,613 @@
 
 #include <algorithm>
 #include <cstddef>
+#include <fstream>
 #include <memory>
+#include <thread>
 #include <vector>
 
 #include "common/bit_cast.h"
 #include "common/cityhash.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
 #include "common/microprofile.h"
+#include "common/thread_worker.h"
 #include "core/core.h"
 #include "core/memory.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/program_header.h"
+#include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/pipeline_helper.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/memory_util.h"
 #include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
 #include "video_core/shader_notify.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
-
 MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;
-
 namespace {
+using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
+using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::FileEnvironment;
+using VideoCommon::GenericEnvironment;
+using VideoCommon::GraphicsEnvironment;
+
+constexpr u32 CACHE_VERSION = 5;
+
+template <typename Container>
+auto MakeSpan(Container& container) {
+    return std::span(container.data(), container.size());
+}
 
-constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
-constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
-constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
-constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
-constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
-constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-
-constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
-    .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
-    .disable_else_derivation = true,
-};
-
-constexpr std::size_t GetStageFromProgram(std::size_t program) {
-    return program == 0 ? 0 : program - 1;
+Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
+    switch (comparison) {
+    case Maxwell::ComparisonOp::Never:
+    case Maxwell::ComparisonOp::NeverOld:
+        return Shader::CompareFunction::Never;
+    case Maxwell::ComparisonOp::Less:
+    case Maxwell::ComparisonOp::LessOld:
+        return Shader::CompareFunction::Less;
+    case Maxwell::ComparisonOp::Equal:
+    case Maxwell::ComparisonOp::EqualOld:
+        return Shader::CompareFunction::Equal;
+    case Maxwell::ComparisonOp::LessEqual:
+    case Maxwell::ComparisonOp::LessEqualOld:
+        return Shader::CompareFunction::LessThanEqual;
+    case Maxwell::ComparisonOp::Greater:
+    case Maxwell::ComparisonOp::GreaterOld:
+        return Shader::CompareFunction::Greater;
+    case Maxwell::ComparisonOp::NotEqual:
+    case Maxwell::ComparisonOp::NotEqualOld:
+        return Shader::CompareFunction::NotEqual;
+    case Maxwell::ComparisonOp::GreaterEqual:
+    case Maxwell::ComparisonOp::GreaterEqualOld:
+        return Shader::CompareFunction::GreaterThanEqual;
+    case Maxwell::ComparisonOp::Always:
+    case Maxwell::ComparisonOp::AlwaysOld:
+        return Shader::CompareFunction::Always;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
+    return {};
 }
 
-constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
-    return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
+Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
+    if (attr.enabled == 0) {
+        return Shader::AttributeType::Disabled;
+    }
+    switch (attr.Type()) {
+    case Maxwell::VertexAttribute::Type::SignedNorm:
+    case Maxwell::VertexAttribute::Type::UnsignedNorm:
+    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+    case Maxwell::VertexAttribute::Type::SignedScaled:
+    case Maxwell::VertexAttribute::Type::Float:
+        return Shader::AttributeType::Float;
+    case Maxwell::VertexAttribute::Type::SignedInt:
+        return Shader::AttributeType::SignedInt;
+    case Maxwell::VertexAttribute::Type::UnsignedInt:
+        return Shader::AttributeType::UnsignedInt;
+    }
+    return Shader::AttributeType::Float;
 }
 
-ShaderType GetShaderType(Maxwell::ShaderProgram program) {
-    switch (program) {
-    case Maxwell::ShaderProgram::VertexB:
-        return ShaderType::Vertex;
-    case Maxwell::ShaderProgram::TesselationControl:
-        return ShaderType::TesselationControl;
-    case Maxwell::ShaderProgram::TesselationEval:
-        return ShaderType::TesselationEval;
-    case Maxwell::ShaderProgram::Geometry:
-        return ShaderType::Geometry;
-    case Maxwell::ShaderProgram::Fragment:
-        return ShaderType::Fragment;
-    default:
-        UNIMPLEMENTED_MSG("program={}", program);
-        return ShaderType::Vertex;
+Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) {
+    switch (state.DynamicAttributeType(index)) {
+    case 0:
+        return Shader::AttributeType::Disabled;
+    case 1:
+        return Shader::AttributeType::Float;
+    case 2:
+        return Shader::AttributeType::SignedInt;
+    case 3:
+        return Shader::AttributeType::UnsignedInt;
     }
+    return Shader::AttributeType::Disabled;
 }
 
-template <VkDescriptorType descriptor_type, class Container>
-void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding,
-                 VkShaderStageFlags stage_flags, const Container& container) {
-    const u32 num_entries = static_cast<u32>(std::size(container));
-    for (std::size_t i = 0; i < num_entries; ++i) {
-        u32 count = 1;
-        if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
-            // Combined image samplers can be arrayed.
-            count = container[i].size;
+Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs,
+                                    const GraphicsPipelineCacheKey& key,
+                                    const Shader::IR::Program& program,
+                                    const Shader::IR::Program* previous_program) {
+    Shader::RuntimeInfo info;
+    if (previous_program) {
+        info.previous_stage_stores = previous_program->info.stores;
+        if (previous_program->is_geometry_passthrough) {
+            info.previous_stage_stores.mask |= previous_program->info.passthrough.mask;
         }
-        bindings.push_back({
-            .binding = binding++,
-            .descriptorType = descriptor_type,
-            .descriptorCount = count,
-            .stageFlags = stage_flags,
-            .pImmutableSamplers = nullptr,
-        });
+    } else {
+        info.previous_stage_stores.mask.set();
+    }
+    const Shader::Stage stage{program.stage};
+    const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough};
+    const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
+    const float point_size{Common::BitCast<float>(key.state.point_size)};
+    switch (stage) {
+    case Shader::Stage::VertexB:
+        if (!has_geometry) {
+            if (key.state.topology == Maxwell::PrimitiveTopology::Points) {
+                info.fixed_state_point_size = point_size;
+            }
+            if (key.state.xfb_enabled) {
+                info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+            }
+            info.convert_depth_mode = gl_ndc;
+        }
+        if (key.state.dynamic_vertex_input) {
+            for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+                info.generic_input_types[index] = AttributeType(key.state, index);
+            }
+        } else {
+            std::ranges::transform(key.state.attributes, info.generic_input_types.begin(),
+                                   &CastAttributeType);
+        }
+        break;
+    case Shader::Stage::TessellationEval:
+        // We have to flip tessellation clockwise for some reason...
+        info.tess_clockwise = key.state.tessellation_clockwise == 0;
+        info.tess_primitive = [&key] {
+            const u32 raw{key.state.tessellation_primitive.Value()};
+            switch (static_cast<Maxwell::TessellationPrimitive>(raw)) {
+            case Maxwell::TessellationPrimitive::Isolines:
+                return Shader::TessPrimitive::Isolines;
+            case Maxwell::TessellationPrimitive::Triangles:
+                return Shader::TessPrimitive::Triangles;
+            case Maxwell::TessellationPrimitive::Quads:
+                return Shader::TessPrimitive::Quads;
+            }
+            UNREACHABLE();
+            return Shader::TessPrimitive::Triangles;
+        }();
+        info.tess_spacing = [&] {
+            const u32 raw{key.state.tessellation_spacing};
+            switch (static_cast<Maxwell::TessellationSpacing>(raw)) {
+            case Maxwell::TessellationSpacing::Equal:
+                return Shader::TessSpacing::Equal;
+            case Maxwell::TessellationSpacing::FractionalOdd:
+                return Shader::TessSpacing::FractionalOdd;
+            case Maxwell::TessellationSpacing::FractionalEven:
+                return Shader::TessSpacing::FractionalEven;
+            }
+            UNREACHABLE();
+            return Shader::TessSpacing::Equal;
+        }();
+        break;
+    case Shader::Stage::Geometry:
+        if (program.output_topology == Shader::OutputTopology::PointList) {
+            info.fixed_state_point_size = point_size;
+        }
+        if (key.state.xfb_enabled != 0) {
+            info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+        }
+        info.convert_depth_mode = gl_ndc;
+        break;
+    case Shader::Stage::Fragment:
+        info.alpha_test_func = MaxwellToCompareFunction(
+            key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
+        info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref);
+        break;
+    default:
+        break;
+    }
+    switch (key.state.topology) {
+    case Maxwell::PrimitiveTopology::Points:
+        info.input_topology = Shader::InputTopology::Points;
+        break;
+    case Maxwell::PrimitiveTopology::Lines:
+    case Maxwell::PrimitiveTopology::LineLoop:
+    case Maxwell::PrimitiveTopology::LineStrip:
+        info.input_topology = Shader::InputTopology::Lines;
+        break;
+    case Maxwell::PrimitiveTopology::Triangles:
+    case Maxwell::PrimitiveTopology::TriangleStrip:
+    case Maxwell::PrimitiveTopology::TriangleFan:
+    case Maxwell::PrimitiveTopology::Quads:
+    case Maxwell::PrimitiveTopology::QuadStrip:
+    case Maxwell::PrimitiveTopology::Polygon:
+    case Maxwell::PrimitiveTopology::Patches:
+        info.input_topology = Shader::InputTopology::Triangles;
+        break;
+    case Maxwell::PrimitiveTopology::LinesAdjacency:
+    case Maxwell::PrimitiveTopology::LineStripAdjacency:
+        info.input_topology = Shader::InputTopology::LinesAdjacency;
+        break;
+    case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+    case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+        info.input_topology = Shader::InputTopology::TrianglesAdjacency;
+        break;
     }
+    info.force_early_z = key.state.early_z != 0;
+    info.y_negate = key.state.y_negate != 0;
+    return info;
 }
+} // Anonymous namespace
 
-u32 FillDescriptorLayout(const ShaderEntries& entries,
-                         std::vector<VkDescriptorSetLayoutBinding>& bindings,
-                         Maxwell::ShaderProgram program_type, u32 base_binding) {
-    const ShaderType stage = GetStageFromProgram(program_type);
-    const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
-
-    u32 binding = base_binding;
-    AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
-    AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
-    AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
-    AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
-    AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
-    AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
-    return binding;
+size_t ComputePipelineCacheKey::Hash() const noexcept {
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+    return static_cast<size_t>(hash);
 }
 
-} // Anonymous namespace
+bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+    return std::memcmp(&rhs, this, sizeof *this) == 0;
+}
 
-std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
+size_t GraphicsPipelineCacheKey::Hash() const noexcept {
     const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
-    return static_cast<std::size_t>(hash);
+    return static_cast<size_t>(hash);
 }
 
 bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
     return std::memcmp(&rhs, this, Size()) == 0;
 }
 
-std::size_t ComputePipelineCacheKey::Hash() const noexcept {
-    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
-    return static_cast<std::size_t>(hash);
-}
-
-bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
-    return std::memcmp(&rhs, this, sizeof *this) == 0;
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
+                             Tegra::Engines::KeplerCompute& kepler_compute_,
+                             Tegra::MemoryManager& gpu_memory_, const Device& device_,
+                             VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
+                             VKUpdateDescriptorQueue& update_descriptor_queue_,
+                             RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
+                             TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
+    : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
+      device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+      update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
+      buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
+      use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
+      workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
+      serialization_thread(1, "yuzu:PipelineSerialization") {
+    const auto& float_control{device.FloatControlProperties()};
+    const VkDriverIdKHR driver_id{device.GetDriverID()};
+    profile = Shader::Profile{
+        .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
+        .unified_descriptor_binding = true,
+        .support_descriptor_aliasing = true,
+        .support_int8 = true,
+        .support_int16 = device.IsShaderInt16Supported(),
+        .support_int64 = device.IsShaderInt64Supported(),
+        .support_vertex_instance_id = false,
+        .support_float_controls = true,
+        .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
+                                            VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+        .support_separate_rounding_mode =
+            float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+        .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
+        .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
+        .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
+        .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
+        .support_fp16_signed_zero_nan_preserve =
+            float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
+        .support_fp32_signed_zero_nan_preserve =
+            float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
+        .support_fp64_signed_zero_nan_preserve =
+            float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
+        .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
+        .support_vote = true,
+        .support_viewport_index_layer_non_geometry =
+            device.IsExtShaderViewportIndexLayerSupported(),
+        .support_viewport_mask = device.IsNvViewportArray2Supported(),
+        .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
+        .support_demote_to_helper_invocation = true,
+        .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
+        .support_derivative_control = true,
+        .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
+
+        .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
+
+        .lower_left_origin_mode = false,
+        .need_declared_frag_colors = false,
+
+        .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
+        .has_broken_unsigned_image_offsets = false,
+        .has_broken_signed_operations = false,
+        .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR,
+        .ignore_nan_fp_comparisons = false,
+    };
+    host_info = Shader::HostTranslateInfo{
+        .support_float16 = device.IsFloat16Supported(),
+        .support_int64 = device.IsShaderInt64Supported(),
+    };
 }
 
-Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_,
-               GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
-    : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
-      shader_ir(program_code, main_offset_, compiler_settings, registry),
-      entries(GenerateShaderEntries(shader_ir)) {}
-
-Shader::~Shader() = default;
-
-VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
-                                 Tegra::Engines::Maxwell3D& maxwell3d_,
-                                 Tegra::Engines::KeplerCompute& kepler_compute_,
-                                 Tegra::MemoryManager& gpu_memory_, const Device& device_,
-                                 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
-                                 VKUpdateDescriptorQueue& update_descriptor_queue_)
-    : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
-      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
-      scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
-                                                                    update_descriptor_queue_} {}
-
-VKPipelineCache::~VKPipelineCache() = default;
+PipelineCache::~PipelineCache() = default;
 
-std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
-    std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
-
-    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
-        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
-
-        // Skip stages that are not enabled
-        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
-            continue;
-        }
-
-        const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
-        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
-        ASSERT(cpu_addr);
-
-        Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
-        if (!result) {
-            const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
-
-            // No shader found - create a new one
-            static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
-            const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
-            ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
-            const std::size_t size_in_bytes = code.size() * sizeof(u64);
-
-            auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
-                                                   std::move(code), stage_offset);
-            result = shader.get();
+GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
-            if (cpu_addr) {
-                Register(std::move(shader), *cpu_addr, size_in_bytes);
-            } else {
-                null_shader = std::move(shader);
-            }
+    if (!RefreshStages(graphics_key.unique_hashes)) {
+        current_pipeline = nullptr;
+        return nullptr;
+    }
+    graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
+                               device.IsExtVertexInputDynamicStateSupported());
+
+    if (current_pipeline) {
+        GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
+        if (next) {
+            current_pipeline = next;
+            return BuiltPipeline(current_pipeline);
         }
-        shaders[index] = result;
     }
-    return last_shaders = shaders;
+    return CurrentGraphicsPipelineSlowPath();
 }
 
-VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
-    const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
-    VideoCommon::Shader::AsyncShaders& async_shaders) {
+ComputePipeline* PipelineCache::CurrentComputePipeline() {
     MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
-    if (last_graphics_pipeline && last_graphics_key == key) {
-        return last_graphics_pipeline;
-    }
-    last_graphics_key = key;
-
-    if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
-        std::unique_lock lock{pipeline_cache};
-        const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
-        if (is_cache_miss) {
-            gpu.ShaderNotify().MarkSharderBuilding();
-            LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-            const auto [program, bindings] = DecompileShaders(key.fixed_state);
-            async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
-                                            update_descriptor_queue, bindings, program, key,
-                                            num_color_buffers);
-        }
-        last_graphics_pipeline = pair->second.get();
-        return last_graphics_pipeline;
+    const ShaderInfo* const shader{ComputeShader()};
+    if (!shader) {
+        return nullptr;
     }
-
-    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
-    auto& entry = pair->second;
-    if (is_cache_miss) {
-        gpu.ShaderNotify().MarkSharderBuilding();
-        LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-        const auto [program, bindings] = DecompileShaders(key.fixed_state);
-        entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
-                                                     update_descriptor_queue, key, bindings,
-                                                     program, num_color_buffers);
-        gpu.ShaderNotify().MarkShaderComplete();
+    const auto& qmd{kepler_compute.launch_description};
+    const ComputePipelineCacheKey key{
+        .unique_hash = shader->unique_hash,
+        .shared_memory_size = qmd.shared_alloc,
+        .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+    };
+    const auto [pair, is_new]{compute_cache.try_emplace(key)};
+    auto& pipeline{pair->second};
+    if (!is_new) {
+        return pipeline.get();
     }
-    last_graphics_pipeline = entry.get();
-    return last_graphics_pipeline;
+    pipeline = CreateComputePipeline(key, shader);
+    return pipeline.get();
 }
 
-VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
-    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
-
-    const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
-    auto& entry = pair->second;
-    if (!is_cache_miss) {
-        return *entry;
+void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+                                      const VideoCore::DiskResourceLoadCallback& callback) {
+    if (title_id == 0) {
+        return;
     }
-    LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-
-    const GPUVAddr gpu_addr = key.shader;
-
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
-    ASSERT(cpu_addr);
+    const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
+    const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
+    if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+        LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
+        return;
+    }
+    pipeline_cache_filename = base_dir / "vulkan.bin";
+
+    struct {
+        std::mutex mutex;
+        size_t total{};
+        size_t built{};
+        bool has_loaded{};
+    } state;
+
+    const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
+        ComputePipelineCacheKey key;
+        file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+        workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
+            ShaderPools pools;
+            auto pipeline{CreateComputePipeline(pools, key, env, false)};
+            std::lock_guard lock{state.mutex};
+            if (pipeline) {
+                compute_cache.emplace(key, std::move(pipeline));
+            }
+            ++state.built;
+            if (state.has_loaded) {
+                callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+            }
+        });
+        ++state.total;
+    }};
+    const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported();
+    const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported();
+    const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
+        GraphicsPipelineCacheKey key;
+        file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+        if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state ||
+            (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) {
+            return;
+        }
+        workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
+            ShaderPools pools;
+            boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+            for (auto& env : envs) {
+                env_ptrs.push_back(&env);
+            }
+            auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
 
-    Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
-    if (!shader) {
-        // No shader found - create a new one
-        const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
+            std::lock_guard lock{state.mutex};
+            graphics_cache.emplace(key, std::move(pipeline));
+            ++state.built;
+            if (state.has_loaded) {
+                callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+            }
+        });
+        ++state.total;
+    }};
+    VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute,
+                               load_graphics);
 
-        ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
-        const std::size_t size_in_bytes = code.size() * sizeof(u64);
+    std::unique_lock lock{state.mutex};
+    callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
+    state.has_loaded = true;
+    lock.unlock();
 
-        auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
-                                                    *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
-        shader = shader_info.get();
+    workers.WaitForRequests();
+}
 
-        if (cpu_addr) {
-            Register(std::move(shader_info), *cpu_addr, size_in_bytes);
-        } else {
-            null_kernel = std::move(shader_info);
-        }
+GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
+    const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+    auto& pipeline{pair->second};
+    if (is_new) {
+        pipeline = CreateGraphicsPipeline();
     }
-
-    const Specialization specialization{
-        .base_binding = 0,
-        .workgroup_size = key.workgroup_size,
-        .shared_memory_size = key.shared_memory_size,
-        .point_size = std::nullopt,
-        .enabled_attributes = {},
-        .attribute_types = {},
-        .ndc_minus_one_to_one = false,
-    };
-    const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
-                                             shader->GetRegistry(), specialization),
-                                   shader->GetEntries()};
-    entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
-                                                update_descriptor_queue, spirv_shader);
-    return *entry;
+    if (!pipeline) {
+        return nullptr;
+    }
+    if (current_pipeline) {
+        current_pipeline->AddTransition(pipeline.get());
+    }
+    current_pipeline = pipeline.get();
+    return BuiltPipeline(current_pipeline);
 }
 
-void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
-    gpu.ShaderNotify().MarkShaderComplete();
-    std::unique_lock lock{pipeline_cache};
-    graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
+GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
+    if (pipeline->IsBuilt()) {
+        return pipeline;
+    }
+    if (!use_asynchronous_shaders) {
+        return pipeline;
+    }
+    // If something is using depth, we can assume that games are not rendering anything which
+    // will be used one time.
+    if (maxwell3d.regs.zeta_enable) {
+        return nullptr;
+    }
+    // If games are using a small index count, we can assume these are full screen quads.
+    // Usually these shaders are only used once for building textures so we can assume they
+    // can't be built async
+    if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+        return pipeline;
+    }
+    return nullptr;
 }
 
-void VKPipelineCache::OnShaderRemoval(Shader* shader) {
-    bool finished = false;
-    const auto Finish = [&] {
-        // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
-        // flush.
-        if (finished) {
-            return;
-        }
-        finished = true;
-        scheduler.Finish();
-    };
-
-    const GPUVAddr invalidated_addr = shader->GetGpuAddr();
-    for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
-        auto& entry = it->first;
-        if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
-            entry.shaders.end()) {
-            ++it;
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
+    ShaderPools& pools, const GraphicsPipelineCacheKey& key,
+    std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+    LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
+    size_t env_index{0};
+    std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+    const bool uses_vertex_a{key.unique_hashes[0] != 0};
+    const bool uses_vertex_b{key.unique_hashes[1] != 0};
+    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        if (key.unique_hashes[index] == 0) {
             continue;
         }
-        Finish();
-        it = graphics_cache.erase(it);
+        Shader::Environment& env{*envs[env_index]};
+        ++env_index;
+
+        const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+        Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+        if (!uses_vertex_a || index != 1) {
+            // Normal path
+            programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
+        } else {
+            // VertexB path when VertexA is present.
+            auto& program_va{programs[0]};
+            auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+            programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
+        }
     }
-    for (auto it = compute_cache.begin(); it != compute_cache.end();) {
-        auto& entry = it->first;
-        if (entry.shader != invalidated_addr) {
-            ++it;
+    std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
+    std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
+
+    const Shader::IR::Program* previous_stage{};
+    Shader::Backend::Bindings binding;
+    for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
+         ++index) {
+        if (key.unique_hashes[index] == 0) {
             continue;
         }
-        Finish();
-        it = compute_cache.erase(it);
+        UNIMPLEMENTED_IF(index == 0);
+
+        Shader::IR::Program& program{programs[index]};
+        const size_t stage_index{index - 1};
+        infos[stage_index] = &program.info;
+
+        const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
+        const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
+        device.SaveShader(code);
+        modules[stage_index] = BuildShader(device, code);
+        if (device.HasDebuggingToolAttached()) {
+            const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
+            modules[stage_index].SetObjectNameEXT(name.c_str());
+        }
+        previous_stage = &program;
     }
+    Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+    return std::make_unique<GraphicsPipeline>(
+        maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
+        descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key,
+        std::move(modules), infos);
+
+} catch (const Shader::Exception& exception) {
+    LOG_ERROR(Render_Vulkan, "{}", exception.what());
+    return nullptr;
 }
 
-std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
-    Specialization specialization;
-    if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
-        float point_size;
-        std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
-        specialization.point_size = point_size;
-        ASSERT(point_size != 0.0f);
-    }
-    for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
-        const auto& attribute = fixed_state.attributes[i];
-        specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
-        specialization.attribute_types[i] = attribute.Type();
-    }
-    specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
-    specialization.early_fragment_tests = fixed_state.early_z;
-
-    // Alpha test
-    specialization.alpha_test_func =
-        FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
-    specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
-
-    SPIRVProgram program;
-    std::vector<VkDescriptorSetLayoutBinding> bindings;
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
+    GraphicsEnvironments environments;
+    GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
 
-    for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) {
-        const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
-        // Skip stages that are not enabled
-        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
-            continue;
-        }
-        const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
-        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
-        Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
-
-        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
-        const ShaderType program_type = GetShaderType(program_enum);
-        const auto& entries = shader->GetEntries();
-        program[stage] = {
-            Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
-            entries,
-        };
-
-        const u32 old_binding = specialization.base_binding;
-        specialization.base_binding =
-            FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
-        ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
+    main_pools.ReleaseContents();
+    auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
+    if (!pipeline || pipeline_cache_filename.empty()) {
+        return pipeline;
     }
-    return {std::move(program), std::move(bindings)};
-}
-
-template <VkDescriptorType descriptor_type, class Container>
-void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding,
-              u32& offset, const Container& container) {
-    static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
-    const u32 count = static_cast<u32>(std::size(container));
-
-    if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
-        for (u32 i = 0; i < count; ++i) {
-            const u32 num_samplers = container[i].size;
-            template_entries.push_back({
-                .dstBinding = binding,
-                .dstArrayElement = 0,
-                .descriptorCount = num_samplers,
-                .descriptorType = descriptor_type,
-                .offset = offset,
-                .stride = entry_size,
-            });
-
-            ++binding;
-            offset += num_samplers * entry_size;
+    serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
+        boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
+            env_ptrs;
+        for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+            if (key.unique_hashes[index] != 0) {
+                env_ptrs.push_back(&envs[index]);
+            }
         }
-        return;
-    }
+        SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION);
+    });
+    return pipeline;
+}
 
-    if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
-                  descriptor_type == STORAGE_TEXEL_BUFFER) {
-        // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
-        // Note: Fixed in driver Windows 443.24, Linux 440.66.15
-        for (u32 i = 0; i < count; ++i) {
-            template_entries.push_back({
-                .dstBinding = binding + i,
-                .dstArrayElement = 0,
-                .descriptorCount = 1,
-                .descriptorType = descriptor_type,
-                .offset = static_cast<std::size_t>(offset + i * entry_size),
-                .stride = entry_size,
-            });
-        }
-    } else if (count > 0) {
-        template_entries.push_back({
-            .dstBinding = binding,
-            .dstArrayElement = 0,
-            .descriptorCount = count,
-            .descriptorType = descriptor_type,
-            .offset = offset,
-            .stride = entry_size,
-        });
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+    const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
+    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+    const auto& qmd{kepler_compute.launch_description};
+    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    env.SetCachedSize(shader->size_bytes);
+
+    main_pools.ReleaseContents();
+    auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
+    if (!pipeline || pipeline_cache_filename.empty()) {
+        return pipeline;
     }
-    offset += count * entry_size;
-    binding += count;
+    serialization_thread.QueueWork([this, key, env = std::move(env)] {
+        SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
+                          pipeline_cache_filename, CACHE_VERSION);
+    });
+    return pipeline;
 }
 
-void FillDescriptorUpdateTemplateEntries(
-    const ShaderEntries& entries, u32& binding, u32& offset,
-    std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
-    AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
-    AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
-    AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
-    AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
-    AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
-    AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+    ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
+    bool build_in_parallel) try {
+    LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
+
+    Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+    auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+    const std::vector<u32> code{EmitSPIRV(profile, program)};
+    device.SaveShader(code);
+    vk::ShaderModule spv_module{BuildShader(device, code)};
+    if (device.HasDebuggingToolAttached()) {
+        const auto name{fmt::format("Shader {:016x}", key.unique_hash)};
+        spv_module.SetObjectNameEXT(name.c_str());
+    }
+    Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+    return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
+                                             thread_worker, &shader_notify, program.info,
+                                             std::move(spv_module));
+
+} catch (const Shader::Exception& exception) {
+    LOG_ERROR(Render_Vulkan, "{}", exception.what());
+    return nullptr;
 }
 
 } // namespace Vulkan