From 93ac5a6a6d316966c1d288f8b83610bb48143a04 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 21 Oct 2022 01:46:51 +0200 Subject: MacroHLE: Add Index Buffer size estimation. --- src/video_core/memory_manager.cpp | 53 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) (limited to 'src/video_core/memory_manager.cpp') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 8c8dfcca6..8f6c51045 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -325,9 +325,15 @@ template ; - static constexpr bool BOOL_BREAK_RESERVED = std::is_same_v; - static constexpr bool BOOL_BREAK_UNMAPPED = std::is_same_v; + using FuncMappedReturn = + typename std::invoke_result::type; + using FuncReservedReturn = + typename std::invoke_result::type; + using FuncUnmappedReturn = + typename std::invoke_result::type; + static constexpr bool BOOL_BREAK_MAPPED = std::is_same_v; + static constexpr bool BOOL_BREAK_RESERVED = std::is_same_v; + static constexpr bool BOOL_BREAK_UNMAPPED = std::is_same_v; u64 used_page_size; u64 used_page_mask; u64 used_page_bits; @@ -571,6 +577,47 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { return range_so_far; } +size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { + PTEKind base_kind = GetPageKind(gpu_addr); + if (base_kind == PTEKind::INVALID) { + return 0; + } + size_t range_so_far = 0; + bool result{false}; + auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, + std::size_t copy_amount) { + result = true; + return true; + }; + auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + PTEKind base_kind_other = GetKind((page_index << page_bits) + offset); + if (base_kind != base_kind_other) { + result = true; + return true; + } + range_so_far += copy_amount; + return false; + }; + auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + PTEKind base_kind_other = GetKind((page_index << big_page_bits) + offset); + if (base_kind != base_kind_other) { + result = true; + return true; + } + range_so_far += copy_amount; + return false; + }; + auto check_short_pages = [&](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + GPUVAddr base = (page_index << big_page_bits) + offset; + MemoryOperation(base, copy_amount, short_check, fail, fail); + return result; + }; + MemoryOperation(gpu_addr, address_space_size - gpu_addr, big_check, fail, + check_short_pages); + return range_so_far; +} + void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, -- cgit v1.2.3 From aad0cbf024fb8077a9b375a093c60a7e2ab1db3d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 9 Nov 2022 17:58:10 +0100 Subject: MacroHLE: Add HLE replacement for base vertex and base instance. --- src/video_core/memory_manager.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/video_core/memory_manager.cpp') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 8f6c51045..11e7d225e 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -577,7 +577,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { return range_so_far; } -size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { +size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { PTEKind base_kind = GetPageKind(gpu_addr); if (base_kind == PTEKind::INVALID) { return 0; @@ -596,6 +596,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { return true; } range_so_far += copy_amount; + if (range_so_far >= max_size) { + result = true; + return true; + } return false; }; auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -605,6 +609,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { return true; } range_so_far += copy_amount; + if (range_so_far >= max_size) { + result = true; + return true; + } return false; }; auto check_short_pages = [&](std::size_t page_index, std::size_t offset, -- cgit v1.2.3 From 18637766efd1ff9a0c22967553983cfda69c96ca Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 17 Nov 2022 16:36:53 +0100 Subject: MacroHLE: Reduce massive calculations on sizing estimation. --- src/video_core/memory_manager.cpp | 91 +++------------------------------------ 1 file changed, 6 insertions(+), 85 deletions(-) (limited to 'src/video_core/memory_manager.cpp') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 11e7d225e..4fcae9909 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -25,7 +25,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits != big_page_bits ? page_bits : 0}, - unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { + kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( + 1, std::memory_order_acq_rel)} { address_space_size = 1ULL << address_space_bits; page_size = 1ULL << page_bits; page_mask = page_size - 1ULL; @@ -41,11 +42,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_entries.resize(big_page_table_size / 32, 0); big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); - std::array kind_valus; - kind_valus.fill(PTEKind::INVALID); - big_kinds.resize(big_page_table_size / 32, kind_valus); entries.resize(page_table_size / 32, 0); - kinds.resize(page_table_size / 32, kind_valus); } MemoryManager::~MemoryManager() = default; @@ -83,38 +80,7 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { } PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const { - auto entry = GetEntry(gpu_addr); - if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] { - return GetKind(gpu_addr); - } else { - return GetKind(gpu_addr); - } -} - -template -PTEKind MemoryManager::GetKind(size_t position) const { - if constexpr (is_big_page) { - position = position >> big_page_bits; - const size_t sub_index = position % 32; - return big_kinds[position / 32][sub_index]; - } else { - position = position >> page_bits; - const size_t sub_index = position % 32; - return kinds[position / 32][sub_index]; - } -} - -template -void MemoryManager::SetKind(size_t position, PTEKind kind) { - if constexpr (is_big_page) { - position = position >> big_page_bits; - const size_t sub_index = position % 32; - big_kinds[position / 32][sub_index] = kind; - } else { - position = position >> page_bits; - const size_t sub_index = position % 32; - kinds[position / 32][sub_index] = kind; - } + return kind_map.GetValueAt(gpu_addr); } inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const { @@ -141,7 +107,6 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp const GPUVAddr current_gpu_addr = gpu_addr + offset; [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); SetEntry(current_gpu_addr, entry_type); - SetKind(current_gpu_addr, kind); if (current_entry_type != entry_type) { rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); } @@ -153,6 +118,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp } remaining_size -= page_size; } + kind_map.Map(gpu_addr, gpu_addr + size, kind); return gpu_addr; } @@ -164,7 +130,6 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr const GPUVAddr current_gpu_addr = gpu_addr + offset; [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); SetEntry(current_gpu_addr, entry_type); - SetKind(current_gpu_addr, kind); if (current_entry_type != entry_type) { rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); } @@ -193,6 +158,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr } remaining_size -= big_page_size; } + kind_map.Map(gpu_addr, gpu_addr + size, kind); return gpu_addr; } @@ -578,52 +544,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { } size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { - PTEKind base_kind = GetPageKind(gpu_addr); - if (base_kind == PTEKind::INVALID) { - return 0; - } - size_t range_so_far = 0; - bool result{false}; - auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, - std::size_t copy_amount) { - result = true; - return true; - }; - auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { - PTEKind base_kind_other = GetKind((page_index << page_bits) + offset); - if (base_kind != base_kind_other) { - result = true; - return true; - } - range_so_far += copy_amount; - if (range_so_far >= max_size) { - result = true; - return true; - } - return false; - }; - auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { - PTEKind base_kind_other = GetKind((page_index << big_page_bits) + offset); - if (base_kind != base_kind_other) { - result = true; - return true; - } - range_so_far += copy_amount; - if (range_so_far >= max_size) { - result = true; - return true; - } - return false; - }; - auto check_short_pages = [&](std::size_t page_index, std::size_t offset, - std::size_t copy_amount) { - GPUVAddr base = (page_index << big_page_bits) + offset; - MemoryOperation(base, copy_amount, short_check, fail, fail); - return result; - }; - MemoryOperation(gpu_addr, address_space_size - gpu_addr, big_check, fail, - check_short_pages); - return range_so_far; + return kind_map.GetContinousSizeFrom(gpu_addr); } void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { -- cgit v1.2.3 From 3630bfaef332768e08ecc0c34cd4bca83a2579f8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 20 Nov 2022 03:07:14 +0100 Subject: RasterizerMemory: Add filtering for flushing/invalidation operations. --- src/video_core/memory_manager.cpp | 60 +++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 27 deletions(-) (limited to 'src/video_core/memory_manager.cpp') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 4fcae9909..3a5cdeb39 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -356,8 +356,8 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si } template -void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, - std::size_t size) const { +void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + [[maybe_unused]] VideoCommon::CacheType which) const { auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { std::memset(dest_buffer, 0, copy_amount); @@ -367,7 +367,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } u8* physical = memory.GetPointer(cpu_addr_base); std::memcpy(dest_buffer, physical, copy_amount); @@ -377,7 +377,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } if (!IsBigPageContinous(page_index)) [[unlikely]] { memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); @@ -395,18 +395,19 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, MemoryOperation(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages); } -void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size); +void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + VideoCommon::CacheType which) const { + ReadBlockImpl(gpu_src_addr, dest_buffer, size, which); } void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size); + ReadBlockImpl(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); } template -void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, - std::size_t size) { +void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + [[maybe_unused]] VideoCommon::CacheType which) { auto just_advance = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { src_buffer = static_cast(src_buffer) + copy_amount; @@ -415,7 +416,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); } u8* physical = memory.GetPointer(cpu_addr_base); std::memcpy(physical, src_buffer, copy_amount); @@ -425,7 +426,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); } if (!IsBigPageContinous(page_index)) [[unlikely]] { memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); @@ -443,16 +444,18 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe MemoryOperation(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages); } -void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { - WriteBlockImpl(gpu_dest_addr, src_buffer, size); +void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + VideoCommon::CacheType which) { + WriteBlockImpl(gpu_dest_addr, src_buffer, size, which); } void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { - WriteBlockImpl(gpu_dest_addr, src_buffer, size); + WriteBlockImpl(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); } -void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { +void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t copy_amount) {}; @@ -460,12 +463,12 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); }; auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -475,7 +478,8 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { MemoryOperation(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); } -bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const { +bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which) const { bool result = false; auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, @@ -484,13 +488,13 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; - result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); + result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); return result; }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; - result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); + result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); return result; }; auto check_short_pages = [&](std::size_t page_index, std::size_t offset, @@ -547,7 +551,8 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) co return kind_map.GetContinousSizeFrom(gpu_addr); } -void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { +void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t copy_amount) {}; @@ -555,12 +560,12 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); }; auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -570,14 +575,15 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { MemoryOperation(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages); } -void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { +void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, + VideoCommon::CacheType which) { std::vector tmp_buffer(size); - ReadBlock(gpu_src_addr, tmp_buffer.data(), size); + ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); // The output block must be flushed in case it has data modified from the GPU. // Fixes NPC geometry in Zombie Panic in Wonderland DX - FlushRegion(gpu_dest_addr, size); - WriteBlock(gpu_dest_addr, tmp_buffer.data(), size); + FlushRegion(gpu_dest_addr, size, which); + WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which); } bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { -- cgit v1.2.3