From efc50485b80e4fde656897b7c9c32f2dbb78977b Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sat, 9 Sep 2023 17:28:06 +0300 Subject: renderer_vulkan: Introduce separate cmd buffer for uploads --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 36 +++++++++++++++++++--- src/video_core/renderer_vulkan/vk_buffer_cache.h | 21 ++++++++++++- .../renderer_vulkan/vk_master_semaphore.cpp | 29 +++++++++++------ .../renderer_vulkan/vk_master_semaphore.h | 14 +++++---- src/video_core/renderer_vulkan/vk_scheduler.cpp | 20 +++++++++--- src/video_core/renderer_vulkan/vk_scheduler.h | 21 ++++++++++--- src/video_core/renderer_vulkan/vk_smaa.cpp | 4 +-- .../renderer_vulkan/vk_staging_buffer_pool.h | 4 +++ 8 files changed, 116 insertions(+), 33 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d8148e89a..7691cc2ba 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) - : VideoCommon::BufferBase(null_params) {} + : VideoCommon::BufferBase(null_params), tracker{4096} {} Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), - device{&runtime.device}, buffer{ - CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { + device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, + tracker{SizeBytes()} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } @@ -355,12 +355,31 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { return device.CanReportMemoryUsage(); } +void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector& slot_buffers) noexcept { + for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) { + it->ResetUsageTracking(); + } +} + void BufferCacheRuntime::Finish() { scheduler.Finish(); } +bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer, + std::span copies) { + if (Settings::values.disable_buffer_reorder) { + return false; + } + const bool can_use_upload_cmdbuf = + std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) { + return !buffer.IsRegionUsed(copy.dst_offset, copy.size); + }); + return can_use_upload_cmdbuf; +} + void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, - std::span copies, bool barrier) { + std::span copies, bool barrier, + bool can_reorder_upload) { if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { return; } @@ -376,9 +395,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; + // Measuring a popular game, this number never exceeds the specified size once data is warmed up boost::container::small_vector vk_copies(copies.size()); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); + if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) { + scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies]( + vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) { + upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + }); + return; + } + scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { if (barrier) { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 95446c732..4416a902f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -5,6 +5,7 @@ #include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/memory_tracker_base.h" +#include "video_core/buffer_cache/usage_tracker.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" @@ -34,6 +35,18 @@ public: return *buffer; } + [[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept { + return tracker.IsUsed(offset, size); + } + + void MarkUsage(u64 offset, u64 size) noexcept { + tracker.Track(offset, size); + } + + void ResetUsageTracking() noexcept { + tracker.Reset(); + } + operator VkBuffer() const noexcept { return *buffer; } @@ -49,6 +62,7 @@ private: const Device* device{}; vk::Buffer buffer; std::vector views; + VideoCommon::UsageTracker tracker; }; class QuadArrayIndexBuffer; @@ -67,6 +81,8 @@ public: ComputePassDescriptorQueue& compute_pass_descriptor_queue, DescriptorPool& descriptor_pool); + void TickFrame(VideoCommon::SlotVector& slot_buffers) noexcept; + void Finish(); u64 GetDeviceLocalMemory() const; @@ -79,12 +95,15 @@ public: [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); + bool CanReorderUpload(const Buffer& buffer, std::span copies); + void FreeDeferredStagingBuffer(StagingBufferRef& ref); void PreCopyBarrier(); void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, - std::span copies, bool barrier = true); + std::span copies, bool barrier, + bool can_reorder_upload = false); void PostCopyBarrier(); diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 6b288b994..ac8b6e838 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) { Refresh(); } -VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick) { +VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, + u64 host_tick) { if (semaphore) { - return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); + return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, + host_tick); } else { - return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); + return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick); } } @@ -115,6 +117,7 @@ static constexpr std::array wait_stage_masks{ }; VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, + vk::CommandBuffer& upload_cmdbuf, VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick) { const VkSemaphore timeline_semaphore = *semaphore; @@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, const std::array signal_values{host_tick, u64(0)}; const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf}; + const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; const VkTimelineSemaphoreSubmitInfo timeline_si{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, @@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, .waitSemaphoreCount = num_wait_semaphores, .pWaitSemaphores = &wait_semaphore, .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1, - .pCommandBuffers = cmdbuf.address(), + .commandBufferCount = static_cast(cmdbuffers.size()), + .pCommandBuffers = cmdbuffers.data(), .signalSemaphoreCount = num_signal_semaphores, .pSignalSemaphores = signal_semaphores.data(), }; @@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, return device.GetGraphicsQueue().Submit(submit_info); } -VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick) { +VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, + vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, + u64 host_tick) { const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; + const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf}; + const VkSubmitInfo submit_info{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = nullptr, .waitSemaphoreCount = num_wait_semaphores, .pWaitSemaphores = &wait_semaphore, .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1, - .pCommandBuffers = cmdbuf.address(), + .commandBufferCount = static_cast(cmdbuffers.size()), + .pCommandBuffers = cmdbuffers.data(), .signalSemaphoreCount = num_signal_semaphores, .pSignalSemaphores = &signal_semaphore, }; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 3f599d7bd..7dfb93ffb 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -52,14 +52,16 @@ public: void Wait(u64 tick); /// Submits the device graphics queue, updating the tick as necessary - VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick); + VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick); private: - VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick); - VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick); + VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, + u64 host_tick); + VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, + u64 host_tick); void WaitThread(std::stop_token token); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 3be7837f4..f1a9406ce 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -22,11 +22,12 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_WaitForWorker); -void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { +void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, + vk::CommandBuffer upload_cmdbuf) { auto command = first; while (command != nullptr) { auto next = command->GetNext(); - command->Execute(cmdbuf); + command->Execute(cmdbuf, upload_cmdbuf); command->~Command(); command = next; } @@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) { // Perform the work, tracking whether the chunk was a submission // before executing. const bool has_submit = work->HasSubmit(); - work->ExecuteAll(current_cmdbuf); + work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf); // If the chunk was a submission, reallocate the command buffer. if (has_submit) { @@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() { .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, .pInheritanceInfo = nullptr, }); + current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); + current_upload_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); } u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { @@ -212,7 +220,9 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se InvalidateState(); const u64 signal_value = master_semaphore->NextTick(); - Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value, + this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) { + upload_cmdbuf.End(); cmdbuf.End(); if (on_submit) { @@ -221,7 +231,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se std::scoped_lock lock{submit_mutex}; switch (const VkResult result = master_semaphore->SubmitQueue( - cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { + cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { case VK_SUCCESS: break; case VK_ERROR_DEVICE_LOST: diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index da03803aa..f8d8ca80a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -80,7 +80,8 @@ public: /// Send work to a separate thread. template - void Record(T&& command) { + requires std::is_invocable_v + void RecordWithUploadBuffer(T&& command) { if (chunk->Record(command)) { return; } @@ -88,6 +89,15 @@ public: (void)chunk->Record(command); } + template + requires std::is_invocable_v + void Record(T&& c) { + this->RecordWithUploadBuffer( + [command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + command(cmdbuf); + }); + } + /// Returns the current command buffer tick. [[nodiscard]] u64 CurrentTick() const noexcept { return master_semaphore->CurrentTick(); @@ -119,7 +129,7 @@ private: public: virtual ~Command() = default; - virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; + virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0; Command* GetNext() const { return next; @@ -142,8 +152,8 @@ private: TypedCommand(TypedCommand&&) = delete; TypedCommand& operator=(TypedCommand&&) = delete; - void Execute(vk::CommandBuffer cmdbuf) const override { - command(cmdbuf); + void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override { + command(cmdbuf, upload_cmdbuf); } private: @@ -152,7 +162,7 @@ private: class CommandChunk final { public: - void ExecuteAll(vk::CommandBuffer cmdbuf); + void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf); template bool Record(T& command) { @@ -228,6 +238,7 @@ private: VideoCommon::QueryCacheBase* query_cache = nullptr; vk::CommandBuffer current_cmdbuf; + vk::CommandBuffer current_upload_cmdbuf; std::unique_ptr chunk; std::function on_submit; diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp index 5efd7d66e..70644ea82 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/vk_smaa.cpp @@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) { UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); - scheduler.Record([&](vk::CommandBuffer& cmdbuf) { + scheduler.Record([&](vk::CommandBuffer cmdbuf) { for (auto& images : m_dynamic_images) { for (size_t i = 0; i < MaxDynamicImage; i++) { ClearColorImage(cmdbuf, *images.images[i]); @@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ UpdateDescriptorSets(source_image_view, image_index); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) { + scheduler.Record([=, this](vk::CommandBuffer cmdbuf) { TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d3deb9072..f63a20327 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -36,6 +36,10 @@ public: StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); void FreeDeferred(StagingBufferRef& ref); + [[nodiscard]] VkBuffer StreamBuf() const noexcept { + return *stream_buffer; + } + void TickFrame(); private: -- cgit v1.2.3