From f4ace63957ee47c4e3e913954f07375d0391beae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:27:25 -0300 Subject: shader: Accelerate pipeline transitions and use dirty flags for shaders --- src/video_core/dirty_flags.cpp | 6 +++ src/video_core/dirty_flags.h | 2 + src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_state_tracker.cpp | 6 --- src/video_core/renderer_opengl/gl_state_tracker.h | 1 - .../renderer_vulkan/vk_graphics_pipeline.cpp | 46 +++++++++--------- .../renderer_vulkan/vk_graphics_pipeline.h | 54 +++++++++++++++++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 31 ++++++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 ++---------- 9 files changed, 114 insertions(+), 64 deletions(-) diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 7149af290..b1be065c3 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { FillBlock(table, OFF(zeta), NUM(zeta), flag); } } + +void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(shader_config[0]), + NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders); +} } // Anonymous namespace void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { @@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { SetupIndexBuffer(tables); SetupDirtyDescriptors(tables); SetupDirtyRenderTargets(tables); + SetupDirtyShaders(tables); } } // namespace VideoCommon::Dirty diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 702688ace..504465d3f 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -36,6 +36,8 @@ enum : u8 { IndexBuffer, + Shaders, + LastCommonEntry, }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3551dbdcc..dd1937863 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -635,7 +635,7 @@ void RasterizerOpenGL::SyncDepthClamp() { void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { + if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) { return; } flags[Dirty::ClipDistances] = false; diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index dbdf5230f..586da84e3 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) { FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); } -void SetupDirtyShaders(Tables& tables) { - FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, - Shaders); -} - void SetupDirtyPolygonModes(Tables& tables) { tables[0][OFF(polygon_mode_front)] = PolygonModeFront; tables[0][OFF(polygon_mode_back)] = PolygonModeBack; @@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} SetupDirtyScissors(tables); SetupDirtyVertexInstances(tables); SetupDirtyVertexFormat(tables); - SetupDirtyShaders(tables); SetupDirtyPolygonModes(tables); SetupDirtyDepthTest(tables); SetupDirtyStencilTest(tables); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 94c905116..5864c7c07 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -52,7 +52,6 @@ enum : u8 { BlendState0, BlendState7 = BlendState0 + 7, - Shaders, ClipDistances, PolygonModes, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2bc1f67ae..100a5e07a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -125,13 +125,12 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state_, + const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, - update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ - std::move(stages)} { + update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -144,7 +143,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -158,6 +157,11 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } } +void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { + transition_keys.push_back(transition->key); + transitions.push_back(transition); +} + void GraphicsPipeline::Configure(bool is_indexed) { static constexpr size_t max_images_elements = 64; std::array image_view_ids; @@ -294,12 +298,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { - dynamic = state.dynamic_state; + dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; + const bool instanced = key.state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ .binding = static_cast(index), @@ -309,14 +313,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (instanced) { vertex_binding_divisors.push_back({ .binding = static_cast(index), - .divisor = state.binding_divisors[index], + .divisor = key.state.binding_divisors[index], }); } } static_vector vertex_attributes; const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; if (!attribute.enabled || !input_attributes[index].used) { continue; } @@ -345,7 +349,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { if (!spv_modules[1] && !spv_modules[2]) { LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); @@ -357,14 +361,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .topology = input_assembly_topology, - .primitiveRestartEnable = state.primitive_restart_enable != 0 && + .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; const VkPipelineTessellationStateCreateInfo tessellation_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; VkPipelineViewportStateCreateInfo viewport_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, @@ -376,7 +380,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pScissors = nullptr, }; std::array swizzles; - std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, @@ -393,15 +397,15 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), .polygonMode = - MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, + .depthBiasEnable = key.state.depth_bias_enable, .depthBiasConstantFactor = 0.0f, .depthBiasClamp = 0.0f, .depthBiasSlopeFactor = 0.0f, @@ -411,7 +415,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, @@ -435,7 +439,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - const size_t num_attachments{NumAttachments(state)}; + const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, @@ -443,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto& blend{state.attachments[index]}; + const auto& blend{key.state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; for (size_t i = 0; i < mask_table.size(); ++i) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 7d14d2378..fd787840b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -4,10 +4,12 @@ #pragma once +#include #include #include #include #include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -20,6 +22,39 @@ namespace Vulkan { +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std + +namespace Vulkan { + class Device; class RenderPassCache; class VKScheduler; @@ -35,7 +70,8 @@ public: const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, const FixedPipelineState& state, + RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, const std::array& infos); @@ -47,16 +83,30 @@ public: GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; + void AddTransition(GraphicsPipeline* transition); + + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + if (key == current_key) { + return this; + } + const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)}; + return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)] + : nullptr; + } + private: void MakePipeline(const Device& device, VkRenderPass render_pass); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - const FixedPipelineState state; + + std::vector transition_keys; + std::vector transitions; std::array spv_modules; std::array stage_infos; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4317b2ac7..2bd870060 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -21,6 +21,7 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/program_header.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -700,17 +701,28 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (!RefreshStages()) { + current_pipeline = nullptr; return nullptr; } graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + if (current_pipeline) { + GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; + if (next) { + current_pipeline = next; + return current_pipeline; + } + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; - if (!is_new) { - return pipeline.get(); + if (is_new) { + pipeline = CreateGraphicsPipeline(); } - pipeline = CreateGraphicsPipeline(); - return pipeline.get(); + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return current_pipeline; } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -743,6 +755,12 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { } bool PipelineCache::RefreshStages() { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_valid_shaders; + } + dirty[VideoCommon::Dirty::Shaders] = false; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { @@ -755,6 +773,7 @@ bool PipelineCache::RefreshStages() { const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; if (!cpu_shader_addr) { LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_valid_shaders = false; return false; } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; @@ -766,6 +785,7 @@ bool PipelineCache::RefreshStages() { shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } + last_valid_shaders = true; return true; } @@ -832,8 +852,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), - infos); + update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e12e4422f..ad569acc4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -58,26 +58,6 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); -struct GraphicsPipelineCacheKey { - std::array unique_hashes; - FixedPipelineState state; - - size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - size_t Size() const noexcept { - return sizeof(unique_hashes) + state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - } // namespace Vulkan namespace std { @@ -89,13 +69,6 @@ struct hash { } }; -template <> -struct hash { - size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - } // namespace std namespace Vulkan { @@ -181,7 +154,10 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + GraphicsPipeline* current_pipeline{}; + std::array shader_infos{}; + bool last_valid_shaders{}; std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3