summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2021-01-20 01:59:53 +0100
committerFernando Sahmkow <fsahmkow27@gmail.com>2021-06-16 21:35:02 +0200
commita11bc4a382ebca52bdf0aab1a9474351e8d85cef (patch)
tree6392fde60f5ee2e414733a193329e18d7f7fde42
parentvulkan_memory_allocator: Release allocations with no commits (diff)
downloadyuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.tar
yuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.tar.gz
yuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.tar.bz2
yuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.tar.lz
yuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.tar.xz
yuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.tar.zst
yuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.zip
-rw-r--r--src/video_core/buffer_cache/buffer_base.h11
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h138
-rw-r--r--src/video_core/texture_cache/image_base.cpp17
-rw-r--r--src/video_core/texture_cache/image_base.h2
-rw-r--r--src/video_core/texture_cache/slot_vector.h70
-rw-r--r--src/video_core/texture_cache/texture_cache.h44
6 files changed, 226 insertions, 56 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index a39505903..b121d36a3 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -256,6 +256,16 @@ public:
stream_score += score;
}
+ /// Sets the new frame tick
+ void SetFrameTick(u64 new_frame_tick) noexcept {
+ frame_tick = new_frame_tick;
+ }
+
+ /// Returns the new frame tick
+ [[nodiscard]] u64 FrameTick() const noexcept {
+ return frame_tick;
+ }
+
/// Returns the likeliness of this being a stream buffer
[[nodiscard]] int StreamScore() const noexcept {
return stream_score;
@@ -586,6 +596,7 @@ private:
RasterizerInterface* rasterizer = nullptr;
VAddr cpu_addr = 0;
Words words;
+ u64 frame_tick = 0;
BufferFlagBits flags{};
int stream_score = 0;
};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d371b842f..ecb7d3dee 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -243,6 +243,8 @@ private:
template <bool insert>
void ChangeRegister(BufferId buffer_id);
+ void TouchBuffer(Buffer& buffer) const noexcept;
+
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
@@ -255,6 +257,10 @@ private:
void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
+ void DownloadBufferMemory(Buffer& buffer_id);
+
+ void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
+
void DeleteBuffer(BufferId buffer_id);
void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
@@ -319,6 +325,9 @@ private:
size_t immediate_buffer_capacity = 0;
std::unique_ptr<u8[]> immediate_buffer_alloc;
+ typename SlotVector<Buffer>::Iterator deletion_iterator;
+ u64 frame_tick = 0;
+
std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
};
@@ -332,6 +341,7 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
+ deletion_iterator = slot_buffers.end();
}
template <class P>
@@ -349,7 +359,24 @@ void BufferCache<P>::TickFrame() {
const bool skip_preferred = hits * 256 < shots * 251;
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
+ static constexpr u64 ticks_to_destroy = 120;
+ int num_iterations = 32;
+ for (; num_iterations > 0; --num_iterations) {
+ if (deletion_iterator == slot_buffers.end()) {
+ deletion_iterator = slot_buffers.begin();
+ }
+ ++deletion_iterator;
+ if (deletion_iterator == slot_buffers.end()) {
+ break;
+ }
+ const auto [buffer_id, buffer] = *deletion_iterator;
+ if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
+ DownloadBufferMemory(*buffer);
+ DeleteBuffer(buffer_id);
+ }
+ }
delayed_destruction_ring.Tick();
+ ++frame_tick;
}
template <class P>
@@ -371,50 +398,8 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
template <class P>
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
- ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
- boost::container::small_vector<BufferCopy, 1> copies;
- u64 total_size_bytes = 0;
- u64 largest_copy = 0;
- buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
- copies.push_back(BufferCopy{
- .src_offset = range_offset,
- .dst_offset = total_size_bytes,
- .size = range_size,
- });
- total_size_bytes += range_size;
- largest_copy = std::max(largest_copy, range_size);
- });
- if (total_size_bytes == 0) {
- return;
- }
- MICROPROFILE_SCOPE(GPU_DownloadMemory);
-
- if constexpr (USE_MEMORY_MAPS) {
- auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
- const u8* const mapped_memory = download_staging.mapped_span.data();
- const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
- for (BufferCopy& copy : copies) {
- // Modify copies to have the staging offset in mind
- copy.dst_offset += download_staging.offset;
- }
- runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
- runtime.Finish();
- for (const BufferCopy& copy : copies) {
- const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
- // Undo the modified offset
- const u64 dst_offset = copy.dst_offset - download_staging.offset;
- const u8* copy_mapped_memory = mapped_memory + dst_offset;
- cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
- }
- } else {
- const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
- for (const BufferCopy& copy : copies) {
- buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
- const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
- cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
- }
- }
- });
+ ForEachBufferInRange(cpu_addr, size,
+ [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); });
}
template <class P>
@@ -640,6 +625,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
template <class P>
void BufferCache<P>::BindHostIndexBuffer() {
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
+ TouchBuffer(buffer);
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
const u32 size = index_buffer.size;
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
@@ -658,6 +644,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
@@ -693,6 +680,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const VAddr cpu_addr = binding.cpu_addr;
const u32 size = binding.size;
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
size <= uniform_buffer_skip_cache_size &&
!buffer.IsRegionGpuModified(cpu_addr, size);
@@ -744,6 +732,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
const Binding& binding = storage_buffers[stage][index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -766,6 +755,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
const Binding& binding = transform_feedback_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -784,6 +774,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
const Binding& binding = compute_uniform_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -803,6 +794,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
const Binding& binding = compute_storage_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -1101,6 +1093,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
+ TouchBuffer(slot_buffers[new_buffer_id]);
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
@@ -1136,6 +1129,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
}
template <class P>
+void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
+ buffer.SetFrameTick(frame_tick);
+}
+
+template <class P>
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
if (buffer.CpuAddr() == 0) {
return true;
@@ -1212,6 +1210,57 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
}
template <class P>
+void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
+ DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
+}
+
+template <class P>
+void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
+ boost::container::small_vector<BufferCopy, 1> copies;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = range_offset,
+ .dst_offset = total_size_bytes,
+ .size = range_size,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ if (total_size_bytes == 0) {
+ return;
+ }
+ MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+ if constexpr (USE_MEMORY_MAPS) {
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ const u8* const mapped_memory = download_staging.mapped_span.data();
+ const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
+ for (BufferCopy& copy : copies) {
+ // Modify copies to have the staging offset in mind
+ copy.dst_offset += download_staging.offset;
+ }
+ runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
+ runtime.Finish();
+ for (const BufferCopy& copy : copies) {
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ // Undo the modified offset
+ const u64 dst_offset = copy.dst_offset - download_staging.offset;
+ const u8* copy_mapped_memory = mapped_memory + dst_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
+ }
+ } else {
+ const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+ for (const BufferCopy& copy : copies) {
+ buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
+ }
+ }
+}
+
+template <class P>
void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
const auto scalar_replace = [buffer_id](Binding& binding) {
if (binding.buffer_id == buffer_id) {
@@ -1236,6 +1285,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
Unregister(buffer_id);
delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
+ slot_buffers.erase(buffer_id);
NotifyBufferDeletion();
}
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 9914926b3..bd0e7e64e 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -113,6 +113,23 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie
image_view_ids.push_back(image_view_id);
}
+bool ImageBase::IsSafeDownload() const noexcept {
+ // Skip images that were not modified from the GPU
+ if (False(flags & ImageFlagBits::GpuModified)) {
+ return false;
+ }
+ // Skip images that .are. modified from the CPU
+ // We don't want to write sensitive data from the guest
+ if (True(flags & ImageFlagBits::CpuModified)) {
+ return false;
+ }
+ if (info.num_samples > 1) {
+ LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ return false;
+ }
+ return true;
+}
+
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
ASSERT(lhs.info.type == rhs.info.type);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index b7f3b7e43..0f69d8a32 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -44,6 +44,8 @@ struct ImageBase {
void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
+ [[nodiscard]] bool IsSafeDownload() const noexcept;
+
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index eae3be6ea..1259e8263 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <bit>
#include <concepts>
#include <numeric>
#include <type_traits>
@@ -32,6 +33,60 @@ template <class T>
requires std::is_nothrow_move_assignable_v<T>&&
std::is_nothrow_move_constructible_v<T> class SlotVector {
public:
+ class Iterator {
+ friend SlotVector<T>;
+
+ public:
+ constexpr Iterator() = default;
+
+ Iterator& operator++() noexcept {
+ const u64* const bitset = slot_vector->stored_bitset.data();
+ const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
+ if (id.index < size) {
+ do {
+ ++id.index;
+ } while (id.index < size && !IsValid(bitset));
+ if (id.index == size) {
+ id.index = SlotId::INVALID_INDEX;
+ }
+ }
+ return *this;
+ }
+
+ Iterator operator++(int) noexcept {
+ const Iterator copy{*this};
+ ++*this;
+ return copy;
+ }
+
+ bool operator==(const Iterator& other) const noexcept {
+ return id.index == other.id.index;
+ }
+
+ bool operator!=(const Iterator& other) const noexcept {
+ return id.index != other.id.index;
+ }
+
+ std::pair<SlotId, T*> operator*() const noexcept {
+ return {id, std::addressof((*slot_vector)[id])};
+ }
+
+ T* operator->() const noexcept {
+ return std::addressof((*slot_vector)[id]);
+ }
+
+ private:
+ Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
+ : slot_vector{slot_vector_}, id{id_} {}
+
+ bool IsValid(const u64* bitset) noexcept {
+ return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
+ }
+
+ SlotVector<T>* slot_vector;
+ SlotId id;
+ };
+
~SlotVector() noexcept {
size_t index = 0;
for (u64 bits : stored_bitset) {
@@ -70,6 +125,20 @@ public:
ResetStorageBit(id.index);
}
+ [[nodiscard]] Iterator begin() noexcept {
+ const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
+ if (it == stored_bitset.end()) {
+ return end();
+ }
+ const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
+ const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
+ return Iterator(this, first_id);
+ }
+
+ [[nodiscard]] Iterator end() noexcept {
+ return Iterator(this, SlotId{SlotId::INVALID_INDEX});
+ }
+
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
@@ -140,7 +209,6 @@ private:
Entry* values = nullptr;
size_t values_capacity = 0;
- size_t values_size = 0;
std::vector<u64> stored_bitset;
std::vector<u32> free_list;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 59b7c678b..45ef155b5 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -353,6 +353,7 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
+ typename SlotVector<Image>::Iterator deletion_iterator;
};
template <class P>
@@ -373,10 +374,41 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
// This way the null resource becomes a compile time constant
void(slot_image_views.insert(runtime, NullImageParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
+
+ deletion_iterator = slot_images.begin();
}
template <class P>
void TextureCache<P>::TickFrame() {
+ static constexpr u64 ticks_to_destroy = 120;
+ int num_iterations = 32;
+ for (; num_iterations > 0; --num_iterations) {
+ if (deletion_iterator == slot_images.end()) {
+ deletion_iterator = slot_images.begin();
+ if (deletion_iterator == slot_images.end()) {
+ break;
+ }
+ }
+ const auto [image_id, image] = *deletion_iterator;
+ if (image->frame_tick + ticks_to_destroy < frame_tick) {
+ if (image->IsSafeDownload() &&
+ std::ranges::none_of(image->aliased_images, [&](const AliasedImage& alias) {
+ return slot_images[alias.id].modification_tick > image->modification_tick;
+ })) {
+ auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
+ const auto copies = FullDownloadCopies(image->info);
+ image->DownloadMemory(map, copies);
+ runtime.Finish();
+ SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
+ }
+ if (True(image->flags & ImageFlagBits::Tracked)) {
+ UntrackImage(*image);
+ }
+ UnregisterImage(image_id);
+ DeleteImage(image_id);
+ }
+ ++deletion_iterator;
+ }
// Tick sentenced resources in this order to ensure they are destroyed in the right order
sentenced_images.Tick();
sentenced_framebuffers.Tick();
@@ -568,17 +600,7 @@ template <class P>
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> images;
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
- // Skip images that were not modified from the GPU
- if (False(image.flags & ImageFlagBits::GpuModified)) {
- return;
- }
- // Skip images that .are. modified from the CPU
- // We don't want to write sensitive data from the guest
- if (True(image.flags & ImageFlagBits::CpuModified)) {
- return;
- }
- if (image.info.num_samples > 1) {
- LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ if (!image.IsSafeDownload()) {
return;
}
image.flags &= ~ImageFlagBits::GpuModified;