From a16c2611316e534bda310f99319f4e8c74c49c92 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 20 Nov 2022 00:09:56 +0100 Subject: Buffer Cache: Fully rework the buffer cache. --- src/video_core/buffer_cache/memory_tracker_base.h | 258 ++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 src/video_core/buffer_cache/memory_tracker_base.h (limited to 'src/video_core/buffer_cache/memory_tracker_base.h') diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h new file mode 100644 index 000000000..93bd779c9 --- /dev/null +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -0,0 +1,258 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "video_core/buffer_cache/word_manager.h" + +namespace VideoCommon { + +template +class MemoryTrackerBase { + static constexpr size_t MAX_CPU_PAGE_BITS = 39; + static constexpr size_t HIGHER_PAGE_BITS = 22; + static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; + static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; + static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); + static constexpr size_t MANAGER_POOL_SIZE = 32; + static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; + using Manager = WordManager; + +public: + MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} + ~MemoryTrackerBase() = default; + + /// Returns the inclusive CPU modified range in a begin end pair + [[nodiscard]] std::pair ModifiedCpuRegion(VAddr query_cpu_addr, + u64 query_size) noexcept { + return IteratePairs(query_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + return manager->ModifiedRegion(offset, size); + }); + } + + /// Returns the inclusive GPU modified range in a begin end pair + [[nodiscard]] std::pair ModifiedGpuRegion(VAddr query_cpu_addr, + u64 query_size) noexcept { + return IteratePairs(query_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + return manager->ModifiedRegion(offset, size); + }); + } + + /// Returns true if a region has been modified from the CPU + [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + return IteratePages(query_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + return manager->IsRegionModified(offset, size); + }); + } + + /// Returns true if a region has been modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + return IteratePages(query_cpu_addr, query_size, + [](Manager* manager, u64 offset, size_t size) { + return manager->IsRegionModified(offset, size); + }); + } + + /// Mark region as CPU modified, notifying the rasterizer about this change + void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { + IteratePages( + dirty_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + manager->ChangeRegionState(manager->GetCpuAddr() + offset, size); + }); + } + + /// Unmark region as CPU modified, notifying the rasterizer about this change + void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { + IteratePages( + dirty_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + manager->ChangeRegionState(manager->GetCpuAddr() + offset, size); + }); + } + + /// Mark region as modified from the host GPU + void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { + IteratePages( + dirty_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + manager->ChangeRegionState(manager->GetCpuAddr() + offset, size); + }); + } + + /// Unmark region as modified from the host GPU + void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { + IteratePages( + dirty_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + manager->ChangeRegionState(manager->GetCpuAddr() + offset, size); + }); + } + + /// Mark region as modified from the CPU + /// but don't mark it as modified until FlusHCachedWrites is called. + void CachedCpuWrite(VAddr dirty_cpu_addr, u64 query_size) { + IteratePages( + dirty_cpu_addr, query_size, [this](Manager* manager, u64 offset, size_t size) { + const VAddr cpu_address = manager->GetCpuAddr() + offset; + manager->ChangeRegionState(cpu_address, size); + cached_pages.insert(static_cast(cpu_address >> HIGHER_PAGE_BITS)); + }); + } + + /// Flushes cached CPU writes, and notify the rasterizer about the deltas + void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { + IteratePages(query_cpu_addr, query_size, + [](Manager* manager, [[maybe_unused]] u64 offset, + [[maybe_unused]] size_t size) { manager->FlushCachedWrites(); }); + } + + void FlushCachedWrites() noexcept { + for (auto id : cached_pages) { + top_tier[id]->FlushCachedWrites(); + } + cached_pages.clear(); + } + + /// Call 'func' for each CPU modified range and unmark those pages as CPU modified + template + void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { + IteratePages(query_cpu_range, query_size, + [&func](Manager* manager, u64 offset, size_t size) { + manager->ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, true, func); + }); + } + + /// Call 'func' for each GPU modified range and unmark those pages as GPU modified + template + void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, bool clear, Func&& func) { + IteratePages(query_cpu_range, query_size, + [&func, clear](Manager* manager, u64 offset, size_t size) { + manager->ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, clear, func); + }); + } + + template + void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 query_size, Func&& func) { + IteratePages(query_cpu_range, query_size, + [&func](Manager* manager, u64 offset, size_t size) { + manager->ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, true, func); + }); + } + +private: + template + bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + std::size_t remaining_size{size}; + std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; + u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; + while (remaining_size > 0) { + const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; + auto* manager{top_tier[page_index]}; + if (manager) { + if constexpr (BOOL_BREAK) { + if (func(manager, page_offset, copy_amount)) { + return true; + } + } else { + func(manager, page_offset, copy_amount); + } + } else if constexpr (create_region_on_fail) { + CreateRegion(page_index); + manager = top_tier[page_index]; + if constexpr (BOOL_BREAK) { + if (func(manager, page_offset, copy_amount)) { + return true; + } + } else { + func(manager, page_offset, copy_amount); + } + } + page_index++; + page_offset = 0; + remaining_size -= copy_amount; + } + return false; + } + + template + std::pair IteratePairs(VAddr cpu_address, size_t size, Func&& func) { + std::size_t remaining_size{size}; + std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; + u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; + u64 begin = std::numeric_limits::max(); + u64 end = 0; + while (remaining_size > 0) { + const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; + auto* manager{top_tier[page_index]}; + const auto execute = [&] { + auto [new_begin, new_end] = func(manager, page_offset, copy_amount); + if (new_begin != 0 || new_end != 0) { + const u64 base_address = page_index << HIGHER_PAGE_BITS; + begin = std::min(new_begin + base_address, begin); + end = std::max(new_end + base_address, end); + } + }; + if (manager) { + execute(); + } else if constexpr (create_region_on_fail) { + CreateRegion(page_index); + manager = top_tier[page_index]; + execute(); + } + page_index++; + page_offset = 0; + remaining_size -= copy_amount; + } + return begin < end ? std::make_pair(begin, end) : std::make_pair(0ULL, 0ULL); + } + + void CreateRegion(std::size_t page_index) { + const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; + top_tier[page_index] = GetNewManager(base_cpu_addr); + } + + Manager* GetNewManager(VAddr base_cpu_addess) { + const auto on_return = [&] { + auto* new_manager = free_managers.front(); + new_manager->SetCpuAddress(base_cpu_addess); + free_managers.pop_front(); + return new_manager; + }; + if (!free_managers.empty()) { + return on_return(); + } + manager_pool.emplace_back(); + auto& last_pool = manager_pool.back(); + for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { + new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); + free_managers.push_back(&last_pool[i]); + } + return on_return(); + } + + std::deque> manager_pool; + std::deque free_managers; + + std::array top_tier{}; + + std::unordered_set cached_pages; + + RasterizerInterface* rasterizer = nullptr; +}; + +} // namespace VideoCommon -- cgit v1.2.3