From 218dedca1f8572bc0e43f8e7ea577f4ece28c4c2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:00:38 -0300 Subject: gl_graphics_pipeline: Port optimizations from Vulkan pipelines --- .../renderer_opengl/gl_graphics_pipeline.cpp | 180 ++++++++++++++------- .../renderer_opengl/gl_graphics_pipeline.h | 18 ++- 2 files changed, 141 insertions(+), 57 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 8d11fbc55..6b62fa1da 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,6 +15,12 @@ #include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" +#if defined(_MSC_VER) && defined(NDEBUG) +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; @@ -98,13 +104,76 @@ std::pair TransformFeedbackEnum(u8 location) { return {GL_POSITION, 0}; } -struct Spec { +template +bool Passes(const std::array& stage_infos, u32 enabled_mask) { + for (size_t stage = 0; stage < stage_infos.size(); ++stage) { + if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template +ConfigureFuncPtr FindSpec(const std::array& stage_infos, u32 enabled_mask) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes(stage_infos, enabled_mask)) { + return FindSpec(stage_infos, enabled_mask); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { static constexpr std::array enabled_stages{true, true, true, true, true}; static constexpr bool has_storage_buffers = true; static constexpr bool has_texture_buffers = true; static constexpr bool has_image_buffers = true; static constexpr bool has_images = true; }; + +ConfigureFuncPtr ConfigureFunc(const std::array& infos, u32 enabled_mask) { + return FindSpec(infos, enabled_mask); +} } // Anonymous namespace size_t GraphicsPipelineKey::Hash() const noexcept { @@ -129,8 +198,52 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (shader_notify) { shader_notify->MarkShaderBuilding(); } - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + auto& info{stage_infos[stage]}; + if (infos[stage]) { + info = *infos[stage]; + enabled_stages_mask |= 1u << stage; + } + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); + + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { if (!device.UseAssemblyShaders()) { program.handle = glCreateProgram(); @@ -142,7 +255,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } if (device.UseAssemblyShaders()) { assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } else { AttachShader(Stage(stage), program.handle, code); } @@ -150,49 +262,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (!device.UseAssemblyShaders()) { LinkProgram(program.handle); } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += - AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += - AccumulateCount(info.storage_buffers_descriptors); - } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; - - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); - } if (shader_notify) { shader_notify->MarkShaderComplete(); } @@ -205,7 +274,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } } -void GraphicsPipeline::Configure(bool is_indexed) { +template +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; @@ -221,7 +291,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - const auto config_stage{[&](size_t stage) { + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { @@ -311,7 +381,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); ImageId* texture_buffer_index{image_view_ids.data()}; - const auto bind_stage_info{[&](size_t stage) { + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { constexpr bool is_image = std::is_same_v; @@ -430,6 +500,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { } } +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal @@ -475,9 +550,4 @@ void GraphicsPipeline::GenerateTransformFeedbackState( num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } -void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, - xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 58deafd3c..a3546daa8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -75,7 +75,9 @@ public: const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); - void Configure(bool is_indexed); + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } void ConfigureTransformFeedback() const { if (num_xfb_attribs != 0) { @@ -91,11 +93,21 @@ public: return is_built.load(std::memory_order::relaxed); } + template + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl(is_indexed); + }; + } + private: - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + template + void ConfigureImpl(bool is_indexed); void ConfigureTransformFeedbackImpl() const; + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -103,6 +115,8 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; + void (*configure_func)(GraphicsPipeline*, bool){}; + OGLProgram program; std::array assembly_programs; u32 enabled_stages_mask{}; -- cgit v1.2.3