summaryrefslogtreecommitdiffstats
path: root/src/video_core/buffer_cache/buffer_cache.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h71
1 files changed, 58 insertions, 13 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index fa26eb8b0..3f2bf6294 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -11,7 +11,6 @@
#include <mutex>
#include <numeric>
#include <span>
-#include <unordered_map>
#include <vector>
#include <boost/container/small_vector.hpp>
@@ -22,7 +21,6 @@
#include "common/literals.h"
#include "common/lru_cache.h"
#include "common/microprofile.h"
-#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_base.h"
@@ -78,8 +76,9 @@ class BufferCache {
static constexpr BufferId NULL_BUFFER_ID{0};
- static constexpr u64 EXPECTED_MEMORY = 512_MiB;
- static constexpr u64 CRITICAL_MEMORY = 1_GiB;
+ static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
+ static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
+ static constexpr s64 TARGET_THRESHOLD = 4_GiB;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -438,6 +437,8 @@ private:
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
u64 frame_tick = 0;
u64 total_used_memory = 0;
+ u64 minimum_memory = 0;
+ u64 critical_memory = 0;
std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
};
@@ -453,11 +454,30 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
common_ranges.clear();
+
+ if (!runtime.CanReportMemoryUsage()) {
+ minimum_memory = DEFAULT_EXPECTED_MEMORY;
+ critical_memory = DEFAULT_CRITICAL_MEMORY;
+ return;
+ }
+
+ const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
+ const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
+ const s64 min_spacing_critical = device_memory - 1_GiB;
+ const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
+ const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
+ const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
+ minimum_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
+ DEFAULT_EXPECTED_MEMORY));
+ critical_memory = static_cast<u64>(
+ std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
+ DEFAULT_CRITICAL_MEMORY));
}
template <class P>
void BufferCache<P>::RunGarbageCollector() {
- const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
+ const bool aggressive_gc = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
int num_iterations = aggressive_gc ? 64 : 32;
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
@@ -488,7 +508,11 @@ void BufferCache<P>::TickFrame() {
const bool skip_preferred = hits * 256 < shots * 251;
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
- if (total_used_memory >= EXPECTED_MEMORY) {
+ // If we can obtain the memory info, use it instead of the estimate.
+ if (runtime.CanReportMemoryUsage()) {
+ total_used_memory = runtime.GetDeviceMemoryUsage();
+ }
+ if (total_used_memory >= minimum_memory) {
RunGarbageCollector();
}
++frame_tick;
@@ -1287,7 +1311,20 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
const GPUVAddr gpu_addr_begin = array.StartAddress();
const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
- const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ if (address_size >= 64_MiB) {
+ // Reported vertex buffer size is very large, cap to mapped buffer size
+ GPUVAddr submapped_addr_end = gpu_addr_begin;
+
+ const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
+ if (ranges.size() > 0) {
+ const auto& [addr, size] = *ranges.begin();
+ submapped_addr_end = addr + size;
+ }
+
+ address_size =
+ std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
+ }
const u32 size = address_size; // TODO: Analyze stride and number of vertices
if (array.enable == 0 || size == 0 || !cpu_addr) {
vertex_buffers[index] = NULL_BINDING;
@@ -1469,19 +1506,27 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
overlap_ids.push_back(overlap_id);
overlap.Pick();
const VAddr overlap_cpu_addr = overlap.CpuAddr();
- if (overlap_cpu_addr < begin) {
+ const bool expands_left = overlap_cpu_addr < begin;
+ if (expands_left) {
cpu_addr = begin = overlap_cpu_addr;
}
- end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
-
+ const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes();
+ const bool expands_right = overlap_end > end;
+ if (overlap_end > end) {
+ end = overlap_end;
+ }
stream_score += overlap.StreamScore();
if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
// When this memory region has been joined a bunch of times, we assume it's being used
// as a stream buffer. Increase the size to skip constantly recreating buffers.
has_stream_leap = true;
- begin -= PAGE_SIZE * 256;
- cpu_addr = begin;
- end += PAGE_SIZE * 256;
+ if (expands_right) {
+ begin -= PAGE_SIZE * 256;
+ cpu_addr = begin;
+ }
+ if (expands_left) {
+ end += PAGE_SIZE * 256;
+ }
}
}
return OverlapResult{