From 3fbee093b2bf3b4c15dbc5bb48a3bc768ecedbc9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 12 Mar 2023 21:43:31 +0100 Subject: TextureCache: refactor DMA downloads to allow multiple buffers. --- src/video_core/texture_cache/texture_cache.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ed5c768d8..2cd5aa31e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -833,6 +833,16 @@ std::pair::Image*, BufferImageCopy> TextureCache

::Dm return {image, copy}; } +template +void TextureCache

::DownloadImageIntoBuffer( + typename TextureCache

::Image* image, typename TextureCache

::BufferType buffer, + size_t buffer_offset, std::span copies) { + std::array buffers{ + buffer, + }; + image->DownloadMemory(buffers, buffer_offset, copies); +} + template void TextureCache

::RefreshContents(Image& image, ImageId image_id) { if (False(image.flags & ImageFlagBits::CpuModified)) { -- cgit v1.2.3 From e3a2ca96bd2350471ebb6c2907c67b10254a4f7e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Apr 2023 18:07:38 +0200 Subject: Accelerate DMA: Use texture cache async downloads to perform the copies to host. WIP --- src/video_core/texture_cache/texture_cache.h | 118 +++++++++++++++++++-------- 1 file changed, 84 insertions(+), 34 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2cd5aa31e..63b8b5af5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -661,27 +661,40 @@ template void TextureCache

::CommitAsyncFlushes() { // This is intentionally passing the value by copy if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - const std::span download_ids = uncommitted_downloads; + auto& download_ids = uncommitted_downloads; if (download_ids.empty()) { committed_downloads.emplace_back(std::move(uncommitted_downloads)); uncommitted_downloads.clear(); - async_buffers.emplace_back(std::optional{}); + async_buffers.emplace_back(std::move(uncommitted_async_buffers)); + uncommitted_async_buffers.clear(); return; } size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + size_t last_async_buffer_id = uncommitted_async_buffers.size(); + bool any_none_dma = false; + for (PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; + any_none_dma = true; + download_info.async_buffer_id = last_async_buffer_id; + } } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + if (any_none_dma) { + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + Image& image = slot_images[download_info.object_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + } + } + uncommitted_async_buffers.emplace_back(download_map); } - async_buffers.emplace_back(download_map); } committed_downloads.emplace_back(std::move(uncommitted_downloads)); + async_buffers.emplace_back(std::move(uncommitted_async_buffers)); + uncommitted_async_buffers.clear(); uncommitted_downloads.clear(); } @@ -691,39 +704,57 @@ void TextureCache

::PopAsyncFlushes() { return; } if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - const std::span download_ids = committed_downloads.front(); + const auto& download_ids = committed_downloads.front(); if (download_ids.empty()) { committed_downloads.pop_front(); async_buffers.pop_front(); return; } - auto download_map = *async_buffers.front(); - std::span download_span = download_map.mapped_span; + auto download_map = std::move(async_buffers.front()); for (size_t i = download_ids.size(); i > 0; i--) { - const ImageBase& image = slot_images[download_ids[i - 1]]; - const auto copies = FullDownloadCopies(image.info); - download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); - std::span download_span_alt = download_span.subspan(download_map.offset); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, - swizzle_data_buffer); + auto& download_info = download_ids[i - 1]; + auto& download_buffer = download_map[download_info.async_buffer_id]; + if (download_info.is_swizzle) { + const ImageBase& image = slot_images[download_info.object_id]; + const auto copies = FullDownloadCopies(image.info); + download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); + std::span download_span = + download_buffer.mapped_span.subspan(download_buffer.offset); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, + swizzle_data_buffer); + } else { + const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id]; + std::span download_span = + download_buffer.mapped_span.subspan(download_buffer.offset); + gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(), + buffer_info.size); + slot_buffer_downloads.erase(download_info.object_id); + } + } + for (auto& download_buffer : download_map) { + runtime.FreeDeferredStagingBuffer(download_buffer); } - runtime.FreeDeferredStagingBuffer(download_map); committed_downloads.pop_front(); async_buffers.pop_front(); } else { - const std::span download_ids = committed_downloads.front(); + const auto& download_ids = committed_downloads.front(); if (download_ids.empty()) { committed_downloads.pop_front(); return; } size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; + } } auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + continue; + } + Image& image = slot_images[download_info.object_id]; const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(download_map, copies); download_map.offset += image.unswizzled_size_bytes; @@ -732,8 +763,11 @@ void TextureCache

::PopAsyncFlushes() { runtime.Finish(); download_map.offset = original_offset; std::span download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + continue; + } + const ImageBase& image = slot_images[download_info.object_id]; const auto copies = FullDownloadCopies(image.info); SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, swizzle_data_buffer); @@ -836,11 +870,27 @@ std::pair::Image*, BufferImageCopy> TextureCache

::Dm template void TextureCache

::DownloadImageIntoBuffer( typename TextureCache

::Image* image, typename TextureCache

::BufferType buffer, - size_t buffer_offset, std::span copies) { - std::array buffers{ - buffer, - }; - image->DownloadMemory(buffers, buffer_offset, copies); + size_t buffer_offset, std::span copies, GPUVAddr address, size_t size) { + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + auto slot = slot_buffer_downloads.insert(address, size); + uncommitted_downloads.emplace_back(false, uncommitted_async_buffers.size(), slot); + auto download_map = runtime.DownloadStagingBuffer(size, true); + uncommitted_async_buffers.emplace_back(download_map); + std::array buffers{ + buffer, + download_map.buffer, + }; + std::array buffer_offsets{ + buffer_offset, + download_map.offset, + }; + image->DownloadMemory(buffers, buffer_offsets, copies); + } else { + std::array buffers{ + buffer, + }; + image->DownloadMemory(buffers, buffer_offset, copies); + } } template @@ -2219,7 +2269,7 @@ void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) if (new_id) { const ImageViewBase& old_view = slot_image_views[new_id]; if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.push_back(old_view.image_id); + uncommitted_downloads.emplace_back(true, 0, old_view.image_id); } } *old_id = new_id; -- cgit v1.2.3 From 58d1c7c77af1a68efa363c322fc5b26bff37ef00 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 22 Apr 2023 15:27:58 +0200 Subject: Address Feedback & Clang Format --- src/video_core/texture_cache/texture_cache.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 63b8b5af5..543ceb5aa 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "common/alignment.h" #include "common/settings.h" @@ -17,15 +18,10 @@ namespace VideoCommon { -using Tegra::Texture::SwizzleSource; -using Tegra::Texture::TextureType; using Tegra::Texture::TICEntry; using Tegra::Texture::TSCEntry; using VideoCore::Surface::GetFormatType; -using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using VideoCore::Surface::SurfaceType; using namespace Common::Literals; @@ -674,7 +670,8 @@ void TextureCache

::CommitAsyncFlushes() { bool any_none_dma = false; for (PendingDownload& download_info : download_ids) { if (download_info.is_swizzle) { - total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; + total_size_bytes += + Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64); any_none_dma = true; download_info.async_buffer_id = last_async_buffer_id; } @@ -868,12 +865,16 @@ std::pair::Image*, BufferImageCopy> TextureCache

::Dm } template -void TextureCache

::DownloadImageIntoBuffer( - typename TextureCache

::Image* image, typename TextureCache

::BufferType buffer, - size_t buffer_offset, std::span copies, GPUVAddr address, size_t size) { +void TextureCache

::DownloadImageIntoBuffer(typename TextureCache

::Image* image, + typename TextureCache

::BufferType buffer, + size_t buffer_offset, + std::span copies, + GPUVAddr address, size_t size) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - auto slot = slot_buffer_downloads.insert(address, size); - uncommitted_downloads.emplace_back(false, uncommitted_async_buffers.size(), slot); + const BufferDownload new_buffer_download{address, size}; + auto slot = slot_buffer_downloads.insert(new_buffer_download); + const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot}; + uncommitted_downloads.emplace_back(new_download); auto download_map = runtime.DownloadStagingBuffer(size, true); uncommitted_async_buffers.emplace_back(download_map); std::array buffers{ @@ -2269,7 +2270,8 @@ void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) if (new_id) { const ImageViewBase& old_view = slot_image_views[new_id]; if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.emplace_back(true, 0, old_view.image_id); + const PendingDownload new_download{true, 0, old_view.image_id}; + uncommitted_downloads.emplace_back(new_download); } } *old_id = new_id; -- cgit v1.2.3 From 4bc5469f52157cd18e697120df40e40e32365e89 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 23 Apr 2023 21:37:13 +0200 Subject: Texture Cache: Release stagging buffers on tick frame --- src/video_core/texture_cache/texture_cache.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 543ceb5aa..e601f8446 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -139,6 +139,13 @@ void TextureCache

::TickFrame() { runtime.TickFrame(); critical_gc = 0; ++frame_tick; + + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + for (auto& buffer : async_buffers_death_ring) { + runtime.FreeDeferredStagingBuffer(buffer); + } + async_buffers_death_ring.clear(); + } } template @@ -688,10 +695,10 @@ void TextureCache

::CommitAsyncFlushes() { } uncommitted_async_buffers.emplace_back(download_map); } + async_buffers.emplace_back(std::move(uncommitted_async_buffers)); + uncommitted_async_buffers.clear(); } committed_downloads.emplace_back(std::move(uncommitted_downloads)); - async_buffers.emplace_back(std::move(uncommitted_async_buffers)); - uncommitted_async_buffers.clear(); uncommitted_downloads.clear(); } @@ -729,7 +736,7 @@ void TextureCache

::PopAsyncFlushes() { } } for (auto& download_buffer : download_map) { - runtime.FreeDeferredStagingBuffer(download_buffer); + async_buffers_death_ring.emplace_back(download_buffer); } committed_downloads.pop_front(); async_buffers.pop_front(); @@ -748,7 +755,7 @@ void TextureCache

::PopAsyncFlushes() { auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); const size_t original_offset = download_map.offset; for (const PendingDownload& download_info : download_ids) { - if (download_info.is_swizzle) { + if (!download_info.is_swizzle) { continue; } Image& image = slot_images[download_info.object_id]; @@ -761,7 +768,7 @@ void TextureCache

::PopAsyncFlushes() { download_map.offset = original_offset; std::span download_span = download_map.mapped_span; for (const PendingDownload& download_info : download_ids) { - if (download_info.is_swizzle) { + if (!download_info.is_swizzle) { continue; } const ImageBase& image = slot_images[download_info.object_id]; @@ -887,10 +894,7 @@ void TextureCache

::DownloadImageIntoBuffer(typename TextureCache

::Image* i }; image->DownloadMemory(buffers, buffer_offsets, copies); } else { - std::array buffers{ - buffer, - }; - image->DownloadMemory(buffers, buffer_offset, copies); + image->DownloadMemory(buffer, buffer_offset, copies); } } -- cgit v1.2.3