summaryrefslogtreecommitdiffstats
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/image_base.cpp37
-rw-r--r--src/video_core/texture_cache/image_base.h12
-rw-r--r--src/video_core/texture_cache/slot_vector.h70
-rw-r--r--src/video_core/texture_cache/texture_cache.h151
-rw-r--r--src/video_core/texture_cache/util.cpp2
5 files changed, 259 insertions, 13 deletions
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 9914926b3..ad69d32d1 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -113,6 +113,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie
image_view_ids.push_back(image_view_id);
}
+bool ImageBase::IsSafeDownload() const noexcept {
+ // Skip images that were not modified from the GPU
+ if (False(flags & ImageFlagBits::GpuModified)) {
+ return false;
+ }
+ // Skip images that .are. modified from the CPU
+ // We don't want to write sensitive data from the guest
+ if (True(flags & ImageFlagBits::CpuModified)) {
+ return false;
+ }
+ if (info.num_samples > 1) {
+ LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ return false;
+ }
+ return true;
+}
+
+void ImageBase::CheckBadOverlapState() {
+ if (False(flags & ImageFlagBits::BadOverlap)) {
+ return;
+ }
+ if (!overlapping_images.empty()) {
+ return;
+ }
+ flags &= ~ImageFlagBits::BadOverlap;
+}
+
+void ImageBase::CheckAliasState() {
+ if (False(flags & ImageFlagBits::Alias)) {
+ return;
+ }
+ if (!aliased_images.empty()) {
+ return;
+ }
+ flags &= ~ImageFlagBits::Alias;
+}
+
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
ASSERT(lhs.info.type == rhs.info.type);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index b7f3b7e43..e326cab71 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 {
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
+
+ // Garbage Collection Flags
+ BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
+ ///< garbage collection priority
+ Alias = 1 << 9, ///< This image has aliases and has priority on garbage
+ ///< collection
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
@@ -44,11 +50,16 @@ struct ImageBase {
void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
+ [[nodiscard]] bool IsSafeDownload() const noexcept;
+
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
}
+ void CheckBadOverlapState();
+ void CheckAliasState();
+
ImageInfo info;
u32 guest_size_bytes = 0;
@@ -72,6 +83,7 @@ struct ImageBase {
std::vector<SubresourceBase> slice_subresources;
std::vector<AliasedImage> aliased_images;
+ std::vector<ImageId> overlapping_images;
};
struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index eae3be6ea..6180b8c0e 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <bit>
#include <concepts>
#include <numeric>
#include <type_traits>
@@ -32,6 +33,60 @@ template <class T>
requires std::is_nothrow_move_assignable_v<T>&&
std::is_nothrow_move_constructible_v<T> class SlotVector {
public:
+ class Iterator {
+ friend SlotVector<T>;
+
+ public:
+ constexpr Iterator() = default;
+
+ Iterator& operator++() noexcept {
+ const u64* const bitset = slot_vector->stored_bitset.data();
+ const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
+ if (id.index < size) {
+ do {
+ ++id.index;
+ } while (id.index < size && !IsValid(bitset));
+ if (id.index == size) {
+ id.index = SlotId::INVALID_INDEX;
+ }
+ }
+ return *this;
+ }
+
+ Iterator operator++(int) noexcept {
+ const Iterator copy{*this};
+ ++*this;
+ return copy;
+ }
+
+ bool operator==(const Iterator& other) const noexcept {
+ return id.index == other.id.index;
+ }
+
+ bool operator!=(const Iterator& other) const noexcept {
+ return id.index != other.id.index;
+ }
+
+ std::pair<SlotId, T*> operator*() const noexcept {
+ return {id, std::addressof((*slot_vector)[id])};
+ }
+
+ T* operator->() const noexcept {
+ return std::addressof((*slot_vector)[id]);
+ }
+
+ private:
+ Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
+ : slot_vector{slot_vector_}, id{id_} {}
+
+ bool IsValid(const u64* bitset) const noexcept {
+ return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
+ }
+
+ SlotVector<T>* slot_vector;
+ SlotId id;
+ };
+
~SlotVector() noexcept {
size_t index = 0;
for (u64 bits : stored_bitset) {
@@ -70,6 +125,20 @@ public:
ResetStorageBit(id.index);
}
+ [[nodiscard]] Iterator begin() noexcept {
+ const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
+ if (it == stored_bitset.end()) {
+ return end();
+ }
+ const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
+ const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
+ return Iterator(this, first_id);
+ }
+
+ [[nodiscard]] Iterator end() noexcept {
+ return Iterator(this, SlotId{SlotId::INVALID_INDEX});
+ }
+
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
@@ -140,7 +209,6 @@ private:
Entry* values = nullptr;
size_t values_capacity = 0;
- size_t values_size = 0;
std::vector<u64> stored_bitset;
std::vector<u32> free_list;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 59b7c678b..e7f8478b4 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -20,8 +20,10 @@
#include "common/alignment.h"
#include "common/common_funcs.h"
+#include "common/common_sizes.h"
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "common/settings.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
@@ -69,12 +71,17 @@ class TextureCache {
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
+ /// True when the API can provide info about the memory of the device.
+ static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// Image view ID for null descriptors
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
/// Sampler ID for bugged sampler ids
static constexpr SamplerId NULL_SAMPLER_ID{0};
+ static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB;
+ static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB;
+
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
@@ -103,6 +110,9 @@ public:
/// Notify the cache that a new frame has been queued
void TickFrame();
+ /// Runs the Garbage Collector.
+ void RunGarbageCollector();
+
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
@@ -333,6 +343,10 @@ private:
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
bool has_deleted_images = false;
+ u64 total_used_memory = 0;
+ u64 minimum_memory;
+ u64 expected_memory;
+ u64 critical_memory;
SlotVector<Image> slot_images;
SlotVector<ImageView> slot_image_views;
@@ -353,6 +367,7 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
+ typename SlotVector<Image>::Iterator deletion_iterator;
};
template <class P>
@@ -373,11 +388,94 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
// This way the null resource becomes a compile time constant
void(slot_image_views.insert(runtime, NullImageParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
+
+ deletion_iterator = slot_images.begin();
+
+ if constexpr (HAS_DEVICE_MEMORY_INFO) {
+ const auto device_memory = runtime.GetDeviceLocalMemory();
+ const u64 possible_expected_memory = (device_memory * 3) / 10;
+ const u64 possible_critical_memory = (device_memory * 6) / 10;
+ expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
+ critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
+ minimum_memory = 0;
+ } else {
+ // on OGL we can be more conservatives as the driver takes care.
+ expected_memory = DEFAULT_EXPECTED_MEMORY + Common::Size_512_MB;
+ critical_memory = DEFAULT_CRITICAL_MEMORY + Common::Size_1_GB;
+ minimum_memory = expected_memory;
+ }
+}
+
+template <class P>
+void TextureCache<P>::RunGarbageCollector() {
+ const bool high_priority_mode = total_used_memory >= expected_memory;
+ const bool aggressive_mode = total_used_memory >= critical_memory;
+ const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
+ int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
+ for (; num_iterations > 0; --num_iterations) {
+ if (deletion_iterator == slot_images.end()) {
+ deletion_iterator = slot_images.begin();
+ if (deletion_iterator == slot_images.end()) {
+ break;
+ }
+ }
+ auto [image_id, image_tmp] = *deletion_iterator;
+ Image* image = image_tmp; // fix clang error.
+ const bool is_alias = True(image->flags & ImageFlagBits::Alias);
+ const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
+ const bool must_download = image->IsSafeDownload();
+ bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
+ const u64 ticks_needed =
+ is_bad_overlap
+ ? ticks_to_destroy >> 4
+ : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
+ should_care |= aggressive_mode;
+ if (should_care && image->frame_tick + ticks_needed < frame_tick) {
+ if (is_bad_overlap) {
+ const bool overlap_check = std::ranges::all_of(
+ image->overlapping_images, [&, image](const ImageId& overlap_id) {
+ auto& overlap = slot_images[overlap_id];
+ return overlap.frame_tick >= image->frame_tick;
+ });
+ if (!overlap_check) {
+ ++deletion_iterator;
+ continue;
+ }
+ }
+ if (!is_bad_overlap && must_download) {
+ const bool alias_check = std::ranges::none_of(
+ image->aliased_images, [&, image](const AliasedImage& alias) {
+ auto& alias_image = slot_images[alias.id];
+ return (alias_image.frame_tick < image->frame_tick) ||
+ (alias_image.modification_tick < image->modification_tick);
+ });
+
+ if (alias_check) {
+ auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
+ const auto copies = FullDownloadCopies(image->info);
+ image->DownloadMemory(map, copies);
+ runtime.Finish();
+ SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
+ }
+ }
+ if (True(image->flags & ImageFlagBits::Tracked)) {
+ UntrackImage(*image);
+ }
+ UnregisterImage(image_id);
+ DeleteImage(image_id);
+ if (is_bad_overlap) {
+ ++num_iterations;
+ }
+ }
+ ++deletion_iterator;
+ }
}
template <class P>
void TextureCache<P>::TickFrame() {
- // Tick sentenced resources in this order to ensure they are destroyed in the right order
+ if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
+ RunGarbageCollector();
+ }
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
@@ -568,17 +666,7 @@ template <class P>
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> images;
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
- // Skip images that were not modified from the GPU
- if (False(image.flags & ImageFlagBits::GpuModified)) {
- return;
- }
- // Skip images that .are. modified from the CPU
- // We don't want to write sensitive data from the guest
- if (True(image.flags & ImageFlagBits::CpuModified)) {
- return;
- }
- if (image.info.num_samples > 1) {
- LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ if (!image.IsSafeDownload()) {
return;
}
image.flags &= ~ImageFlagBits::GpuModified;
@@ -967,6 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
std::vector<ImageId> overlap_ids;
std::vector<ImageId> left_aliased_ids;
std::vector<ImageId> right_aliased_ids;
+ std::vector<ImageId> bad_overlap_ids;
ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
if (info.type != overlap.info.type) {
return;
@@ -992,9 +1081,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
left_aliased_ids.push_back(overlap_id);
+ overlap.flags |= ImageFlagBits::Alias;
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
broken_views, native_bgr)) {
right_aliased_ids.push_back(overlap_id);
+ overlap.flags |= ImageFlagBits::Alias;
+ } else {
+ bad_overlap_ids.push_back(overlap_id);
+ overlap.flags |= ImageFlagBits::BadOverlap;
}
});
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
@@ -1022,10 +1116,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId aliased_id : right_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
+ new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : left_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
+ new_image.flags |= ImageFlagBits::Alias;
+ }
+ for (const ImageId aliased_id : bad_overlap_ids) {
+ ImageBase& aliased = slot_images[aliased_id];
+ aliased.overlapping_images.push_back(new_image_id);
+ new_image.overlapping_images.push_back(aliased_id);
+ new_image.flags |= ImageFlagBits::BadOverlap;
}
RegisterImage(new_image_id);
return new_image_id;
@@ -1195,6 +1297,13 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
image.flags |= ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.guest_size_bytes,
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
+ u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+ if ((IsPixelFormatASTC(image.info.format) &&
+ True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+ True(image.flags & ImageFlagBits::Converted)) {
+ tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+ }
+ total_used_memory += Common::AlignUp(tentative_size, 1024);
}
template <class P>
@@ -1203,6 +1312,14 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
"Trying to unregister an already registered image");
image.flags &= ~ImageFlagBits::Registered;
+ image.flags &= ~ImageFlagBits::BadOverlap;
+ u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+ if ((IsPixelFormatASTC(image.info.format) &&
+ True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+ True(image.flags & ImageFlagBits::Converted)) {
+ tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+ }
+ total_used_memory -= Common::AlignUp(tentative_size, 1024);
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
@@ -1276,9 +1393,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
return other_alias.id == image_id;
});
+ other_image.CheckAliasState();
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
num_removed_aliases);
}
+ for (const ImageId overlap_id : image.overlapping_images) {
+ ImageBase& other_image = slot_images[overlap_id];
+ [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
+ other_image.overlapping_images,
+ [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
+ other_image.CheckBadOverlapState();
+ ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
+ num_removed_overlaps);
+ }
for (const ImageViewId image_view_id : image_view_ids) {
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
slot_image_views.erase(image_view_id);
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 6835fd747..4efe042b6 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
const std::span<const u8> src = input.subspan(host_offset);
+ gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
+
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
num_tiles.depth, block.height, block.depth);