// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include #include #include #include #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader/compiler_settings.h" namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; namespace { constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ VideoCommon::Shader::CompileDepth::FullDecompile}; /// Gets the address for the specified shader stage program GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { const auto& gpu{system.GPU().Maxwell3D()}; const auto& shader_config{gpu.regs.shader_config[static_cast(program)]}; return gpu.regs.code_address.CodeAddress() + shader_config.offset; } /// Gets if the current instruction offset is a scheduler instruction constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { // Sched instructions appear once every 4 instructions. constexpr std::size_t SchedPeriod = 4; const std::size_t absolute_offset = offset - main_offset; return (absolute_offset % SchedPeriod) == 0; } /// Calculates the size of a program stream std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { const std::size_t start_offset = is_compute ? 0 : 10; // This is the encoded version of BRA that jumps to itself. All Nvidia // shaders end with one. constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; std::size_t offset = start_offset; while (offset < program.size()) { const u64 instruction = program[offset]; if (!IsSchedInstruction(offset, start_offset)) { if ((instruction & mask) == self_jumping_branch) { // End on Maxwell's "nop" instruction break; } if (instruction == 0) { break; } } ++offset; } // The last instruction is included in the program size return std::min(offset + 1, program.size()); } /// Gets the shader program code from memory for the specified address ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, const u8* host_ptr, bool is_compute) { ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); ASSERT_OR_EXECUTE(host_ptr != nullptr, { std::fill(program_code.begin(), program_code.end(), 0); return program_code; }); memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), program_code.size() * sizeof(u64)); program_code.resize(CalculateProgramSize(program_code, is_compute)); return program_code; } constexpr std::size_t GetStageFromProgram(std::size_t program) { return program == 0 ? 0 : program - 1; } constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { return static_cast(GetStageFromProgram(static_cast(program))); } ShaderType GetShaderType(Maxwell::ShaderProgram program) { switch (program) { case Maxwell::ShaderProgram::VertexB: return ShaderType::Vertex; case Maxwell::ShaderProgram::TesselationControl: return ShaderType::TesselationControl; case Maxwell::ShaderProgram::TesselationEval: return ShaderType::TesselationEval; case Maxwell::ShaderProgram::Geometry: return ShaderType::Geometry; case Maxwell::ShaderProgram::Fragment: return ShaderType::Fragment; default: UNIMPLEMENTED_MSG("program={}", static_cast(program)); return ShaderType::Vertex; } } u32 FillDescriptorLayout(const ShaderEntries& entries, std::vector& bindings, Maxwell::ShaderProgram program_type, u32 base_binding) { const ShaderType stage = GetStageFromProgram(program_type); const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); u32 binding = base_binding; const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { for (std::size_t i = 0; i < num_entries; ++i) { bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); } }; AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); return binding; } } // Anonymous namespace CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset) : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, compiler_settings, locker}, entries{GenerateShaderEntries(shader_ir)} {} CachedShader::~CachedShader() = default; Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( Core::System& system, Tegra::Engines::ShaderType stage) { if (stage == Tegra::Engines::ShaderType::Compute) { return system.GPU().KeplerCompute(); } else { return system.GPU().Maxwell3D(); } } VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue) : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, renderpass_cache(device) {} VKPipelineCache::~VKPipelineCache() = default; std::array VKPipelineCache::GetShaders() { const auto& gpu = system.GPU().Maxwell3D(); auto& dirty = system.GPU().Maxwell3D().dirty.shaders; if (!dirty) { return last_shaders; } dirty = false; std::array shaders; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; const auto program{static_cast(index)}; // Skip stages that are not enabled if (!gpu.regs.IsShaderConfigEnabled(index)) { continue; } auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr program_addr{GetShaderAddress(system, program)}; const auto host_ptr{memory_manager.GetPointer(program_addr)}; auto shader = TryGet(host_ptr); if (!shader) { // No shader found - create a new one constexpr u32 stage_offset = 10; const auto stage = static_cast(index == 0 ? 0 : index - 1); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); ASSERT(cpu_addr); shader = std::make_shared(system, stage, program_addr, *cpu_addr, host_ptr, std::move(code), stage_offset); Register(shader); } shaders[index] = std::move(shader); } return last_shaders = shaders; } VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { return *last_graphics_pipeline; } last_graphics_key = key; const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); auto& entry = pair->second; if (is_cache_miss) { LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key); entry = std::make_unique(device, scheduler, descriptor_pool, update_descriptor_queue, renderpass_cache, key, bindings, program); } return *(last_graphics_pipeline = entry.get()); } VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); auto& entry = pair->second; if (!is_cache_miss) { return *entry; } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); auto& memory_manager = system.GPU().MemoryManager(); const auto program_addr = key.shader; const auto host_ptr = memory_manager.GetPointer(program_addr); auto shader = TryGet(host_ptr); if (!shader) { // No shader found - create a new one const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); ASSERT(cpu_addr); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); constexpr u32 kernel_main_offset = 0; shader = std::make_shared(system, Tegra::Engines::ShaderType::Compute, program_addr, *cpu_addr, host_ptr, std::move(code), kernel_main_offset); Register(shader); } Specialization specialization; specialization.workgroup_size = key.workgroup_size; specialization.shared_memory_size = key.shared_memory_size; const SPIRVShader spirv_shader{ Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), shader->GetEntries()}; entry = std::make_unique(device, scheduler, descriptor_pool, update_descriptor_queue, spirv_shader); return *entry; } void VKPipelineCache::Unregister(const Shader& shader) { bool finished = false; const auto Finish = [&] { // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and // flush. if (finished) { return; } finished = true; scheduler.Finish(); }; const GPUVAddr invalidated_addr = shader->GetGpuAddr(); for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { auto& entry = it->first; if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == entry.shaders.end()) { ++it; continue; } Finish(); it = graphics_cache.erase(it); } for (auto it = compute_cache.begin(); it != compute_cache.end();) { auto& entry = it->first; if (entry.shader != invalidated_addr) { ++it; continue; } Finish(); it = compute_cache.erase(it); } RasterizerCache::Unregister(shader); } std::pair> VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto& fixed_state = key.fixed_state; auto& memory_manager = system.GPU().MemoryManager(); const auto& gpu = system.GPU().Maxwell3D(); Specialization specialization; specialization.primitive_topology = fixed_state.input_assembly.topology; if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { ASSERT(fixed_state.input_assembly.point_size != 0.0f); specialization.point_size = fixed_state.input_assembly.point_size; } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; } specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; specialization.tessellation.primitive = fixed_state.tessellation.primitive; specialization.tessellation.spacing = fixed_state.tessellation.spacing; specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; for (const auto& rt : key.renderpass_params.color_attachments) { specialization.enabled_rendertargets.set(rt.index); } SPIRVProgram program; std::vector bindings; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto program_enum = static_cast(index); // Skip stages that are not enabled if (!gpu.regs.IsShaderConfigEnabled(index)) { continue; } const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); const auto host_ptr = memory_manager.GetPointer(gpu_addr); const auto shader = TryGet(host_ptr); ASSERT(shader); const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 const auto program_type = GetShaderType(program_enum); const auto& entries = shader->GetEntries(); program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), entries}; if (program_enum == Maxwell::ShaderProgram::VertexA) { // VertexB was combined with VertexA, so we skip the VertexB iteration ++index; } const u32 old_binding = specialization.base_binding; specialization.base_binding = FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); } return {std::move(program), std::move(bindings)}; } void FillDescriptorUpdateTemplateEntries( const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, std::vector& template_entries) { static constexpr auto entry_size = static_cast(sizeof(DescriptorUpdateEntry)); const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { const u32 count = static_cast(count_); if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { // Nvidia has a bug where updating multiple uniform texels at once causes the driver to // crash. for (u32 i = 0; i < count; ++i) { template_entries.emplace_back(binding + i, 0, 1, descriptor_type, offset + i * entry_size, entry_size); } } else if (count != 0) { template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); } offset += count * entry_size; binding += count; }; AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); } } // namespace Vulkan