diff options
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r-- | src/video_core/texture_cache/descriptor_table.h | 1 | ||||
-rw-r--r-- | src/video_core/texture_cache/format_lookup_table.cpp | 6 | ||||
-rw-r--r-- | src/video_core/texture_cache/formatter.h | 4 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_base.h | 7 | ||||
-rw-r--r-- | src/video_core/texture_cache/render_targets.h | 1 | ||||
-rw-r--r-- | src/video_core/texture_cache/slot_vector.h | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 82 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 8 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.h | 2 |
9 files changed, 72 insertions, 41 deletions
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h index 3a03b786f..318bd5214 100644 --- a/src/video_core/texture_cache/descriptor_table.h +++ b/src/video_core/texture_cache/descriptor_table.h @@ -9,7 +9,6 @@ #include "common/common_types.h" #include "common/div_ceil.h" -#include "common/logging/log.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index afa807d5d..20e64a7c2 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -63,6 +63,10 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::A1B5G5R5_UNORM; case Hash(TextureFormat::A4B4G4R4, UNORM): return PixelFormat::A4B4G4R4_UNORM; + case Hash(TextureFormat::G4R4, UNORM): + return PixelFormat::R4G4_UNORM; + case Hash(TextureFormat::A5B5G5R1, UNORM): + return PixelFormat::A5B5G5R1_UNORM; case Hash(TextureFormat::R8, UNORM): return PixelFormat::R8_UNORM; case Hash(TextureFormat::R8, SNORM): @@ -143,6 +147,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::S8_UINT_D24_UNORM; case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): return PixelFormat::S8_UINT_D24_UNORM; + case Hash(TextureFormat::D24S8, UNORM, UINT, UINT, UINT, LINEAR): + return PixelFormat::D24_UNORM_S8_UINT; case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): return PixelFormat::D32_FLOAT_S8_UINT; case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index b2c81057b..6f5afc5a9 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -38,6 +38,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str return "A2B10G10R10_UINT"; case PixelFormat::A1B5G5R5_UNORM: return "A1B5G5R5_UNORM"; + case PixelFormat::A5B5G5R1_UNORM: + return "A5B5G5R1_UNORM"; case PixelFormat::R8_UNORM: return "R8_UNORM"; case PixelFormat::R8_SNORM: @@ -152,6 +154,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str return "BC7_SRGB"; case PixelFormat::A4B4G4R4_UNORM: return "A4B4G4R4_UNORM"; + case PixelFormat::R4G4_UNORM: + return "R4G4_UNORM"; case PixelFormat::ASTC_2D_4X4_SRGB: return "ASTC_2D_4X4_SRGB"; case PixelFormat::ASTC_2D_8X8_SRGB: diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 89c111c00..dd0106432 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -33,11 +33,12 @@ enum class ImageFlagBits : u32 { ///< garbage collection priority Alias = 1 << 11, ///< This image has aliases and has priority on garbage ///< collection + CostlyLoad = 1 << 12, ///< Protected from low-tier GC as it is costly to load back. // Rescaler - Rescaled = 1 << 12, - CheckingRescalable = 1 << 13, - IsRescalable = 1 << 14, + Rescaled = 1 << 13, + CheckingRescalable = 1 << 14, + IsRescalable = 1 << 15, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h index 0cb227d69..f8f13e84c 100644 --- a/src/video_core/texture_cache/render_targets.h +++ b/src/video_core/texture_cache/render_targets.h @@ -6,7 +6,6 @@ #include <algorithm> #include <span> -#include <utility> #include "common/bit_cast.h" #include "video_core/texture_cache/types.h" diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 50df06409..6aabaef7b 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -5,9 +5,7 @@ #pragma once #include <algorithm> -#include <array> #include <bit> -#include <concepts> #include <numeric> #include <type_traits> #include <utility> diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 198bb0cfb..8fef74117 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -50,14 +50,20 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_samplers.insert(runtime, sampler_descriptor)); if constexpr (HAS_DEVICE_MEMORY_INFO) { - const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 4) / 10; - const u64 possible_critical_memory = (device_memory * 7) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); - minimum_memory = 0; + const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); + const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; + const s64 min_spacing_critical = device_memory - 1_GiB; + const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); + const s64 min_vacancy_expected = (6 * mem_threshold) / 10; + const s64 min_vacancy_critical = (3 * mem_threshold) / 10; + expected_memory = static_cast<u64>( + std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), + DEFAULT_EXPECTED_MEMORY)); + critical_memory = static_cast<u64>( + std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), + DEFAULT_CRITICAL_MEMORY)); + minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); } else { - // On OpenGL we can be more conservatives as the driver takes care. expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; minimum_memory = 0; @@ -66,18 +72,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& template <class P> void TextureCache<P>::RunGarbageCollector() { - const bool high_priority_mode = total_used_memory >= expected_memory; - const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; - size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); - const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { + bool high_priority_mode = total_used_memory >= expected_memory; + bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; + size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); + const auto clean_up = [this, &num_iterations, &high_priority_mode, + &aggressive_mode](ImageId image_id) { if (num_iterations == 0) { return true; } --num_iterations; auto& image = slot_images[image_id]; - const bool must_download = image.IsSafeDownload(); - if (!high_priority_mode && must_download) { + const bool must_download = + image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); + if (!high_priority_mode && + (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) { return false; } if (must_download) { @@ -92,6 +101,18 @@ void TextureCache<P>::RunGarbageCollector() { } UnregisterImage(image_id); DeleteImage(image_id, image.scale_tick > frame_tick + 5); + if (total_used_memory < critical_memory) { + if (aggressive_mode) { + // Sink the aggresiveness. + num_iterations >>= 2; + aggressive_mode = false; + return false; + } + if (high_priority_mode && total_used_memory < expected_memory) { + num_iterations >>= 1; + high_priority_mode = false; + } + } return false; }; lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); @@ -99,6 +120,10 @@ void TextureCache<P>::RunGarbageCollector() { template <class P> void TextureCache<P>::TickFrame() { + // If we can obtain the memory info, use it instead of the estimate. + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } if (total_used_memory > minimum_memory) { RunGarbageCollector(); } @@ -106,6 +131,7 @@ void TextureCache<P>::TickFrame() { sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); runtime.TickFrame(); + critical_gc = 0; ++frame_tick; } @@ -343,7 +369,7 @@ template <bool has_blacklists> void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, std::span<ImageViewInOut> views) { - bool has_blacklisted; + bool has_blacklisted = false; do { has_deleted_images = false; if constexpr (has_blacklists) { @@ -1052,6 +1078,9 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + new_image.flags |= ImageFlagBits::GpuModified; + } if (overlap.info.num_samples != new_image.info.num_samples) { LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); } else { @@ -1414,6 +1443,10 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) { + RunGarbageCollector(); + critical_gc++; + } image.lru_index = lru_cache.Insert(image_id, frame_tick); ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, @@ -1704,6 +1737,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); + if (True(aliased_image.flags & ImageFlagBits::GpuModified)) { + image.flags |= ImageFlagBits::GpuModified; + } } } if (aliased_images.empty()) { @@ -1725,7 +1761,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { }); const auto& resolution = Settings::values.resolution_info; for (const AliasedImage* const aliased : aliased_images) { - if (!resolution.active | !any_rescaled) { + if (!resolution.active || !any_rescaled) { CopyImage(image_id, aliased->id, aliased->copies); continue; } @@ -1736,19 +1772,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { continue; } ScaleUp(aliased_image); - - const bool both_2d{image.info.type == ImageType::e2D && - aliased_image.info.type == ImageType::e2D}; - auto copies = aliased->copies; - for (auto copy : copies) { - copy.extent.width = std::max<u32>( - (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1); - if (both_2d) { - copy.extent.height = std::max<u32>( - (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1); - } - } - CopyImage(image_id, aliased->id, copies); + CopyImage(image_id, aliased->id, aliased->copies); } } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 7107887a6..b1324edf3 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -22,7 +22,6 @@ #include "video_core/texture_cache/image_base.h" #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view_base.h" -#include "video_core/texture_cache/image_view_info.h" #include "video_core/texture_cache/render_targets.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" @@ -60,8 +59,10 @@ class TextureCache { /// True when the API can provide info about the memory of the device. static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; - static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; - static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; + static constexpr s64 TARGET_THRESHOLD = 4_GiB; + static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; + static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; + static constexpr size_t GC_EMERGENCY_COUNTS = 2; using Runtime = typename P::Runtime; using Image = typename P::Image; @@ -373,6 +374,7 @@ private: u64 minimum_memory; u64 expected_memory; u64 critical_memory; + size_t critical_gc; SlotVector<Image> slot_images; SlotVector<ImageMapView> slot_map_views; diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 7af52de2e..f13669ea5 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -9,10 +9,8 @@ #include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" #include "video_core/texture_cache/image_base.h" -#include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" |