From d8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 13 Jun 2021 15:47:54 +0200 Subject: Reaper: Tune it up to be an smart GC. --- src/video_core/buffer_cache/buffer_cache.h | 27 +++++++-- src/video_core/texture_cache/image_base.cpp | 20 +++++++ src/video_core/texture_cache/image_base.h | 10 ++++ src/video_core/texture_cache/texture_cache.h | 84 +++++++++++++++++++++++++--- src/video_core/texture_cache/util.cpp | 2 + 5 files changed, 130 insertions(+), 13 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index ecb7d3dee..b4fa85c5b 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -65,6 +65,9 @@ class BufferCache { static constexpr BufferId NULL_BUFFER_ID{0}; + static constexpr u64 expected_memory = 512ULL * 1024ULL * 1024ULL; + static constexpr u64 critical_memory = 1024ULL * 1024ULL * 1024ULL; + using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Runtime = typename P::Runtime; @@ -327,6 +330,7 @@ private: typename SlotVector::Iterator deletion_iterator; u64 frame_tick = 0; + u64 total_used_memory = 0; std::array> PAGE_BITS)> page_table; }; @@ -346,6 +350,10 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, template void BufferCache

::TickFrame() { + SCOPE_EXIT({ + ++frame_tick; + delayed_destruction_ring.Tick(); + }); // Calculate hits and shots and move hit bits to the right const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); @@ -359,8 +367,13 @@ void BufferCache

::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - static constexpr u64 ticks_to_destroy = 120; - int num_iterations = 32; + const bool activate_gc = total_used_memory >= expected_memory; + if (!activate_gc) { + return; + } + const bool agressive_gc = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = agressive_gc ? 60 : 120; + int num_iterations = agressive_gc ? 64 : 32; for (; num_iterations > 0; --num_iterations) { if (deletion_iterator == slot_buffers.end()) { deletion_iterator = slot_buffers.begin(); @@ -375,8 +388,6 @@ void BufferCache

::TickFrame() { DeleteBuffer(buffer_id); } } - delayed_destruction_ring.Tick(); - ++frame_tick; } template @@ -1115,8 +1126,14 @@ template template void BufferCache

::ChangeRegister(BufferId buffer_id) { const Buffer& buffer = slot_buffers[buffer_id]; + const auto size = buffer.SizeBytes(); + if (insert) { + total_used_memory += Common::AlignUp(size, 1024); + } else { + total_used_memory -= Common::AlignUp(size, 1024); + } const VAddr cpu_addr_begin = buffer.CpuAddr(); - const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes(); + const VAddr cpu_addr_end = cpu_addr_begin + size; const u64 page_begin = cpu_addr_begin / PAGE_SIZE; const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); for (u64 page = page_begin; page != page_end; ++page) { diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index bd0e7e64e..ad69d32d1 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -130,6 +130,26 @@ bool ImageBase::IsSafeDownload() const noexcept { return true; } +void ImageBase::CheckBadOverlapState() { + if (False(flags & ImageFlagBits::BadOverlap)) { + return; + } + if (!overlapping_images.empty()) { + return; + } + flags &= ~ImageFlagBits::BadOverlap; +} + +void ImageBase::CheckAliasState() { + if (False(flags & ImageFlagBits::Alias)) { + return; + } + if (!aliased_images.empty()) { + return; + } + flags &= ~ImageFlagBits::Alias; +} + void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; ASSERT(lhs.info.type == rhs.info.type); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0f69d8a32..40c047ea1 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 { Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked + + // Garbage Collection Flags + BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 9, ///< This image has aliases and has priority on garbage + ///< collection }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -51,6 +57,9 @@ struct ImageBase { return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; } + void CheckBadOverlapState(); + void CheckAliasState(); + ImageInfo info; u32 guest_size_bytes = 0; @@ -74,6 +83,7 @@ struct ImageBase { std::vector slice_subresources; std::vector aliased_images; + std::vector overlapping_images; }; struct ImageAllocBase { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 45ef155b5..cf48f7b02 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -75,6 +75,9 @@ class TextureCache { /// Sampler ID for bugged sampler ids static constexpr SamplerId NULL_SAMPLER_ID{0}; + static constexpr u64 expected_memory = 1024ULL * 1024ULL * 1024ULL; + static constexpr u64 critical_memory = 2 * 1024ULL * 1024ULL * 1024ULL; + using Runtime = typename P::Runtime; using Image = typename P::Image; using ImageAlloc = typename P::ImageAlloc; @@ -333,6 +336,7 @@ private: std::unordered_map, IdentityHash> page_table; bool has_deleted_images = false; + u64 total_used_memory = 0; SlotVector slot_images; SlotVector slot_image_views; @@ -380,8 +384,10 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& template void TextureCache

::TickFrame() { - static constexpr u64 ticks_to_destroy = 120; - int num_iterations = 32; + const bool high_priority_mode = total_used_memory >= expected_memory; + const bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; + int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); for (; num_iterations > 0; --num_iterations) { if (deletion_iterator == slot_images.end()) { deletion_iterator = slot_images.begin(); @@ -390,11 +396,42 @@ void TextureCache

::TickFrame() { } } const auto [image_id, image] = *deletion_iterator; - if (image->frame_tick + ticks_to_destroy < frame_tick) { - if (image->IsSafeDownload() && - std::ranges::none_of(image->aliased_images, [&](const AliasedImage& alias) { - return slot_images[alias.id].modification_tick > image->modification_tick; - })) { + const bool is_alias = True(image->flags & ImageFlagBits::Alias); + if (is_alias && image->aliased_images.size() <= 1) { + ++deletion_iterator; + continue; + } + const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); + const bool must_download = image->IsSafeDownload(); + const u64 ticks_needed = is_bad_overlap ? ticks_to_destroy >> 4 : ticks_to_destroy; + const bool should_care = + aggressive_mode || is_bad_overlap || is_alias || (high_priority_mode && !must_download); + if (should_care && image->frame_tick + ticks_needed < frame_tick) { + if (is_bad_overlap) { + const bool overlap_check = + std::ranges::all_of(image->overlapping_images, [&](const ImageId& overlap_id) { + auto& overlap = slot_images[overlap_id]; + return (overlap.frame_tick >= image->frame_tick) && + (overlap.modification_tick > image->modification_tick); + }); + if (!overlap_check) { + ++deletion_iterator; + continue; + } + } + if (!is_bad_overlap && must_download) { + if (is_alias) { + const bool alias_check = + std::ranges::all_of(image->aliased_images, [&](const AliasedImage& alias) { + auto& alias_image = slot_images[alias.id]; + return (alias_image.frame_tick >= image->frame_tick) && + (alias_image.modification_tick > image->modification_tick); + }); + if (!alias_check) { + ++deletion_iterator; + continue; + } + } auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); const auto copies = FullDownloadCopies(image->info); image->DownloadMemory(map, copies); @@ -406,10 +443,12 @@ void TextureCache

::TickFrame() { } UnregisterImage(image_id); DeleteImage(image_id); + if (is_bad_overlap) { + num_iterations++; + } } ++deletion_iterator; } - // Tick sentenced resources in this order to ensure they are destroyed in the right order sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); @@ -989,6 +1028,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA std::vector overlap_ids; std::vector left_aliased_ids; std::vector right_aliased_ids; + std::vector bad_overlap_ids; ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { if (info.type != overlap.info.type) { return; @@ -1014,9 +1054,14 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { left_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, broken_views, native_bgr)) { right_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else { + bad_overlap_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::BadOverlap; } }); const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); @@ -1044,10 +1089,18 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId aliased_id : right_aliased_ids) { ImageBase& aliased = slot_images[aliased_id]; AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + new_image.flags |= ImageFlagBits::Alias; } for (const ImageId aliased_id : left_aliased_ids) { ImageBase& aliased = slot_images[aliased_id]; AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : bad_overlap_ids) { + ImageBase& aliased = slot_images[aliased_id]; + aliased.overlapping_images.push_back(new_image_id); + new_image.overlapping_images.push_back(aliased_id); + new_image.flags |= ImageFlagBits::BadOverlap; } RegisterImage(new_image_id); return new_image_id; @@ -1217,6 +1270,8 @@ void TextureCache

::RegisterImage(ImageId image_id) { image.flags |= ImageFlagBits::Registered; ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { page_table[page].push_back(image_id); }); + total_used_memory += + Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); } template @@ -1225,6 +1280,9 @@ void TextureCache

::UnregisterImage(ImageId image_id) { ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; + image.flags &= ~ImageFlagBits::BadOverlap; + total_used_memory -= + Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { @@ -1298,9 +1356,19 @@ void TextureCache

::DeleteImage(ImageId image_id) { std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { return other_alias.id == image_id; }); + other_image.CheckAliasState(); ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", num_removed_aliases); } + for (const ImageId overlap_id : image.overlapping_images) { + ImageBase& other_image = slot_images[overlap_id]; + [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( + other_image.overlapping_images, + [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); + other_image.CheckBadOverlapState(); + ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", + num_removed_overlaps); + } for (const ImageViewId image_view_id : image_view_ids) { sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); slot_image_views.erase(image_view_id); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 0d3e0804f..9680167ee 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr for (s32 layer = 0; layer < info.resources.layers; ++layer) { const std::span src = input.subspan(host_offset); + gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); + SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, num_tiles.depth, block.height, block.depth); -- cgit v1.2.3