From 6f7cb69c94bef0795f054d881e061745f69d1eda Mon Sep 17 00:00:00 2001 From: Kelebek1 Date: Mon, 29 May 2023 00:35:51 +0100 Subject: Use spans over guest memory where possible instead of copying data. --- src/core/core_timing.cpp | 3 +- src/core/core_timing.h | 2 +- src/core/hle/service/hle_ipc.cpp | 32 ++++-- src/core/memory.cpp | 54 +++++++++- src/core/memory.h | 212 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 285 insertions(+), 18 deletions(-) (limited to 'src/core') diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index e6112a3c9..b98a0cb33 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -70,7 +70,7 @@ void CoreTiming::Initialize(std::function&& on_thread_init_) { -> std::optional { return std::nullopt; }; ev_lost = CreateEvent("_lost_event", empty_timed_callback); if (is_multicore) { - timer_thread = std::make_unique(ThreadEntry, std::ref(*this)); + timer_thread = std::make_unique(ThreadEntry, std::ref(*this)); } } @@ -255,7 +255,6 @@ void CoreTiming::ThreadLoop() { #ifdef _WIN32 while (!paused && !event.IsSet() && wait_time > 0) { wait_time = *next_time - GetGlobalTimeNs().count(); - if (wait_time >= timer_resolution_ns) { Common::Windows::SleepForOneTick(); } else { diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 5bca1c78d..c20e906fb 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -163,7 +163,7 @@ private: Common::Event pause_event{}; std::mutex basic_lock; std::mutex advance_lock; - std::unique_ptr timer_thread; + std::unique_ptr timer_thread; std::atomic paused{}; std::atomic paused_set{}; std::atomic wait_set{}; diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp index 2290df705..f6a1e54f2 100644 --- a/src/core/hle/service/hle_ipc.cpp +++ b/src/core/hle/service/hle_ipc.cpp @@ -329,8 +329,22 @@ std::vector HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons } std::span HLERequestContext::ReadBuffer(std::size_t buffer_index) const { - static thread_local std::array, 2> read_buffer_a; - static thread_local std::array, 2> read_buffer_x; + static thread_local std::array read_buffer_a{ + Core::Memory::CpuGuestMemory(memory, 0, 0), + Core::Memory::CpuGuestMemory(memory, 0, 0), + }; + static thread_local std::array read_buffer_data_a{ + Common::ScratchBuffer(), + Common::ScratchBuffer(), + }; + static thread_local std::array read_buffer_x{ + Core::Memory::CpuGuestMemory(memory, 0, 0), + Core::Memory::CpuGuestMemory(memory, 0, 0), + }; + static thread_local std::array read_buffer_data_x{ + Common::ScratchBuffer(), + Common::ScratchBuffer(), + }; const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && BufferDescriptorA()[buffer_index].Size()}; @@ -339,19 +353,17 @@ std::span HLERequestContext::ReadBuffer(std::size_t buffer_index) cons BufferDescriptorA().size() > buffer_index, { return {}; }, "BufferDescriptorA invalid buffer_index {}", buffer_index); auto& read_buffer = read_buffer_a[buffer_index]; - read_buffer.resize_destructive(BufferDescriptorA()[buffer_index].Size()); - memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), read_buffer.data(), - read_buffer.size()); - return read_buffer; + return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), + BufferDescriptorA()[buffer_index].Size(), + &read_buffer_data_a[buffer_index]); } else { ASSERT_OR_EXECUTE_MSG( BufferDescriptorX().size() > buffer_index, { return {}; }, "BufferDescriptorX invalid buffer_index {}", buffer_index); auto& read_buffer = read_buffer_x[buffer_index]; - read_buffer.resize_destructive(BufferDescriptorX()[buffer_index].Size()); - memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), read_buffer.data(), - read_buffer.size()); - return read_buffer; + return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), + BufferDescriptorX()[buffer_index].Size(), + &read_buffer_data_x[buffer_index]); } } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 257406f09..805963178 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -266,6 +266,22 @@ struct Memory::Impl { ReadBlockImpl(*system.ApplicationProcess(), src_addr, dest_buffer, size); } + const u8* GetSpan(const VAddr src_addr, const std::size_t size) const { + if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] == + current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) { + return GetPointerSilent(src_addr); + } + return nullptr; + } + + u8* GetSpan(const VAddr src_addr, const std::size_t size) { + if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] == + current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) { + return GetPointerSilent(src_addr); + } + return nullptr; + } + template void WriteBlockImpl(const Kernel::KProcess& process, const Common::ProcessAddress dest_addr, const void* src_buffer, const std::size_t size) { @@ -559,7 +575,7 @@ struct Memory::Impl { } } - const Common::ProcessAddress end = base + size; + const auto end = base + size; ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", base + page_table.pointers.size()); @@ -570,14 +586,18 @@ struct Memory::Impl { while (base != end) { page_table.pointers[base].Store(nullptr, type); page_table.backing_addr[base] = 0; - + page_table.blocks[base] = 0; base += 1; } } else { + auto orig_base = base; while (base != end) { - page_table.pointers[base].Store( - system.DeviceMemory().GetPointer(target) - (base << YUZU_PAGEBITS), type); - page_table.backing_addr[base] = GetInteger(target) - (base << YUZU_PAGEBITS); + auto host_ptr = + system.DeviceMemory().GetPointer(target) - (base << YUZU_PAGEBITS); + auto backing = GetInteger(target) - (base << YUZU_PAGEBITS); + page_table.pointers[base].Store(host_ptr, type); + page_table.backing_addr[base] = backing; + page_table.blocks[base] = orig_base << YUZU_PAGEBITS; ASSERT_MSG(page_table.pointers[base].Pointer(), "memory mapping base yield a nullptr within the table"); @@ -747,6 +767,14 @@ struct Memory::Impl { VAddr last_address; }; + void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { + system.GPU().InvalidateRegion(GetInteger(dest_addr), size); + } + + void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { + system.GPU().FlushRegion(GetInteger(dest_addr), size); + } + Core::System& system; Common::PageTable* current_page_table = nullptr; std::array @@ -881,6 +909,14 @@ void Memory::ReadBlockUnsafe(const Common::ProcessAddress src_addr, void* dest_b impl->ReadBlockUnsafe(src_addr, dest_buffer, size); } +const u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) const { + return impl->GetSpan(src_addr, size); +} + +u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) { + return impl->GetSpan(src_addr, size); +} + void Memory::WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer, const std::size_t size) { impl->WriteBlock(dest_addr, src_buffer, size); @@ -924,4 +960,12 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) impl->MarkRegionDebug(GetInteger(vaddr), size, debug); } +void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { + impl->InvalidateRegion(dest_addr, size); +} + +void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { + impl->FlushRegion(dest_addr, size); +} + } // namespace Core::Memory diff --git a/src/core/memory.h b/src/core/memory.h index ea01824f8..ea33c769c 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -5,8 +5,12 @@ #include #include +#include #include #include +#include + +#include "common/scratch_buffer.h" #include "common/typed_address.h" #include "core/hle/result.h" @@ -24,6 +28,10 @@ class PhysicalMemory; class KProcess; } // namespace Kernel +namespace Tegra { +class MemoryManager; +} + namespace Core::Memory { /** @@ -343,6 +351,9 @@ public: */ void ReadBlockUnsafe(Common::ProcessAddress src_addr, void* dest_buffer, std::size_t size); + const u8* GetSpan(const VAddr src_addr, const std::size_t size) const; + u8* GetSpan(const VAddr src_addr, const std::size_t size); + /** * Writes a range of bytes into the current process' address space at the specified * virtual address. @@ -461,6 +472,8 @@ public: void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); void SetGPUDirtyManagers(std::span managers); + void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); + void FlushRegion(Common::ProcessAddress dest_addr, size_t size); private: Core::System& system; @@ -469,4 +482,203 @@ private: std::unique_ptr impl; }; +enum GuestMemoryFlags : u32 { + Read = 1 << 0, + Write = 1 << 1, + Safe = 1 << 2, + Cached = 1 << 3, + + SafeRead = Read | Safe, + SafeWrite = Write | Safe, + SafeReadWrite = SafeRead | SafeWrite, + SafeReadCachedWrite = SafeReadWrite | Cached, + + UnsafeRead = Read, + UnsafeWrite = Write, + UnsafeReadWrite = UnsafeRead | UnsafeWrite, + UnsafeReadCachedWrite = UnsafeReadWrite | Cached, +}; + +namespace { +template +class GuestMemory { + using iterator = T*; + using const_iterator = const T*; + using value_type = T; + using element_type = T; + using iterator_category = std::contiguous_iterator_tag; + +public: + GuestMemory() = delete; + explicit GuestMemory(M& memory_, u64 addr_, std::size_t size_, + Common::ScratchBuffer* backup = nullptr) + : memory{memory_}, addr{addr_}, size{size_} { + static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); + if constexpr (FLAGS & GuestMemoryFlags::Read) { + Read(addr, size, backup); + } + } + + ~GuestMemory() = default; + + T* data() noexcept { + return data_span.data(); + } + + const T* data() const noexcept { + return data_span.data(); + } + + [[nodiscard]] T* begin() noexcept { + return data(); + } + + [[nodiscard]] const T* begin() const noexcept { + return data(); + } + + [[nodiscard]] T* end() noexcept { + return data() + size; + } + + [[nodiscard]] const T* end() const noexcept { + return data() + size; + } + + T& operator[](size_t index) noexcept { + return data_span[index]; + } + + const T& operator[](size_t index) const noexcept { + return data_span[index]; + } + + void SetAddressAndSize(u64 addr_, std::size_t size_) noexcept { + addr = addr_; + size = size_; + addr_changed = true; + } + + std::span Read(u64 addr_, std::size_t size_, + Common::ScratchBuffer* backup = nullptr) noexcept { + addr = addr_; + size = size_; + if (size == 0) { + is_data_copy = true; + return {}; + } + + if (TrySetSpan()) { + if constexpr (FLAGS & GuestMemoryFlags::Safe) { + memory.FlushRegion(addr, size * sizeof(T)); + } + } else { + if (backup) { + backup->resize_destructive(size); + data_span = *backup; + } else { + data_copy.resize(size); + data_span = std::span(data_copy); + } + is_data_copy = true; + span_valid = true; + if constexpr (FLAGS & GuestMemoryFlags::Safe) { + memory.ReadBlock(addr, data_span.data(), size * sizeof(T)); + } else { + memory.ReadBlockUnsafe(addr, data_span.data(), size * sizeof(T)); + } + } + return data_span; + } + + void Write(std::span write_data) noexcept { + if constexpr (FLAGS & GuestMemoryFlags::Cached) { + memory.WriteBlockCached(addr, write_data.data(), size * sizeof(T)); + } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { + memory.WriteBlock(addr, write_data.data(), size * sizeof(T)); + } else { + memory.WriteBlockUnsafe(addr, write_data.data(), size * sizeof(T)); + } + } + + bool TrySetSpan() noexcept { + if (u8* ptr = memory.GetSpan(addr, size * sizeof(T)); ptr) { + data_span = {reinterpret_cast(ptr), size}; + span_valid = true; + return true; + } + return false; + } + +protected: + bool IsDataCopy() const noexcept { + return is_data_copy; + } + + bool AddressChanged() const noexcept { + return addr_changed; + } + + M& memory; + u64 addr; + size_t size; + std::span data_span{}; + std::vector data_copy; + bool span_valid{false}; + bool is_data_copy{false}; + bool addr_changed{false}; +}; + +template +class GuestMemoryScoped : public GuestMemory { +public: + GuestMemoryScoped() = delete; + explicit GuestMemoryScoped(M& memory_, u64 addr_, std::size_t size_, + Common::ScratchBuffer* backup = nullptr) + : GuestMemory(memory_, addr_, size_, backup) { + if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { + if (!this->TrySetSpan()) { + if (backup) { + this->data_span = *backup; + this->span_valid = true; + this->is_data_copy = true; + } + } + } + } + + ~GuestMemoryScoped() { + if constexpr (FLAGS & GuestMemoryFlags::Write) { + if (this->size == 0) [[unlikely]] { + return; + } + + if (this->AddressChanged() || this->IsDataCopy()) { + ASSERT(this->span_valid); + if constexpr (FLAGS & GuestMemoryFlags::Cached) { + this->memory.WriteBlockCached(this->addr, this->data_span.data(), + this->size * sizeof(T)); + } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { + this->memory.WriteBlock(this->addr, this->data_span.data(), + this->size * sizeof(T)); + } else { + this->memory.WriteBlockUnsafe(this->addr, this->data_span.data(), + this->size * sizeof(T)); + } + } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { + this->memory.InvalidateRegion(this->addr, this->size * sizeof(T)); + } + } + } +}; +} // namespace + +template +using CpuGuestMemory = GuestMemory; +template +using CpuGuestMemoryScoped = GuestMemoryScoped; +template +using GpuGuestMemory = GuestMemory; +template +using GpuGuestMemoryScoped = GuestMemoryScoped; } // namespace Core::Memory -- cgit v1.2.3