From 0a2536a0df1f4aea406f2132d3edda0430acc9d1 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 25 Dec 2023 07:32:16 +0100 Subject: SMMU: Initial adaptation to video_core. --- src/core/CMakeLists.txt | 2 + src/core/core.cpp | 2 +- src/core/core.h | 2 +- src/core/device_memory_manager.h | 43 +++- src/core/device_memory_manager.inc | 72 ++++++- src/core/gpu_dirty_memory_manager.h | 10 +- src/core/guest_memory.h | 218 +++++++++++++++++++++ src/core/hle/service/hle_ipc.cpp | 61 ++---- src/core/hle/service/hle_ipc.h | 9 +- src/core/hle/service/nvdrv/core/nvmap.cpp | 64 ++---- src/core/hle/service/nvdrv/core/nvmap.h | 19 +- .../hle/service/nvdrv/devices/nvdisp_disp0.cpp | 2 +- .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 57 +++--- src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 20 +- .../service/nvdrv/devices/nvhost_nvdec_common.cpp | 8 +- src/core/hle/service/nvdrv/devices/nvmap.cpp | 4 +- src/core/hle/service/nvdrv/nvdrv_interface.cpp | 6 +- src/core/memory.cpp | 25 ++- src/core/memory.h | 205 ------------------- 19 files changed, 441 insertions(+), 388 deletions(-) create mode 100644 src/core/guest_memory.h (limited to 'src/core') diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 293d9647b..ca54eb6c6 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -37,6 +37,8 @@ add_library(core STATIC debugger/gdbstub_arch.h debugger/gdbstub.cpp debugger/gdbstub.h + device_memory_manager.h + device_memory_manager.inc device_memory.cpp device_memory.h file_sys/fssystem/fs_i_storage.h diff --git a/src/core/core.cpp b/src/core/core.cpp index 461eea9c8..04e1f13ff 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -651,7 +651,7 @@ size_t System::GetCurrentHostThreadID() const { return impl->kernel.GetCurrentHostThreadID(); } -void System::GatherGPUDirtyMemory(std::function& callback) { +void System::GatherGPUDirtyMemory(std::function& callback) { return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); } diff --git a/src/core/core.h b/src/core/core.h index ba5add0dc..20ec2ffff 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -224,7 +224,7 @@ public: /// Prepare the core emulation for a reschedule void PrepareReschedule(u32 core_index); - void GatherGPUDirtyMemory(std::function& callback); + void GatherGPUDirtyMemory(std::function& callback); [[nodiscard]] size_t GetCurrentHostThreadID() const; diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index 71b95016c..1a63cbd09 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h @@ -3,10 +3,11 @@ #pragma once -#include -#include #include #include +#include +#include +#include #include "common/common_types.h" #include "common/virtual_buffer.h" @@ -48,26 +49,54 @@ public: template const T* GetPointer(DAddr address) const; + DAddr GetAddressFromPAddr(PAddr address) const { + DAddr subbits = static_cast(address & page_mask); + return (static_cast(compressed_device_addr[(address >> page_bits)]) << page_bits) + subbits; + } + + PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const { + PAddr subbits = static_cast(address & page_mask); + auto paddr = compressed_physical_ptr[(address >> page_bits)]; + if (paddr == 0) { + return 0; + } + return (static_cast(paddr - 1) << page_bits) + subbits; + } + template void Write(DAddr address, T value); template T Read(DAddr address) const; + const u8* GetSpan(const DAddr src_addr, const std::size_t size) const { + return nullptr; + } + + u8* GetSpan(const DAddr src_addr, const std::size_t size) { + return nullptr; + } + void ReadBlock(DAddr address, void* dest_pointer, size_t size); - void WriteBlock(DAddr address, void* src_pointer, size_t size); + void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size); + void WriteBlock(DAddr address, const void* src_pointer, size_t size); + void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size); size_t RegisterProcess(Memory::Memory* memory); void UnregisterProcess(size_t id); void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); + static constexpr size_t AS_BITS = Traits::device_virtual_bits; + private: static constexpr bool supports_pinning = Traits::supports_pinning; static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; static constexpr size_t device_as_size = 1ULL << device_virtual_bits; static constexpr size_t physical_max_bits = 33; static constexpr size_t page_bits = 12; + static constexpr size_t page_size = 1ULL << page_bits; + static constexpr size_t page_mask = page_size - 1ULL; static constexpr u32 physical_address_base = 1U << page_bits; template @@ -136,11 +165,15 @@ private: private: std::array values{}; }; - static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!"); + static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), + "CounterEntry should be 8 bytes!"); - static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries; + static constexpr size_t num_counter_entries = + (1ULL << (device_virtual_bits - page_bits)) / subentries; using CachedPages = std::array; std::unique_ptr cached_pages; + std::mutex counter_guard; + std::mutex mapping_guard; }; } // namespace Core \ No newline at end of file diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 77410f72f..8c5f82d31 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -105,7 +105,8 @@ template DeviceMemoryManager::DeviceMemoryManager(const DeviceMemory& device_memory_) : physical_base{reinterpret_cast(device_memory_.buffer.BackingBasePointer())}, interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), - compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { + compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)), + cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { impl = std::make_unique>(); cached_pages = std::make_unique(); } @@ -144,10 +145,10 @@ void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size Core::Memory::Memory* process_memory = registered_processes[process_id]; size_t start_page_d = address >> Memory::YUZU_PAGEBITS; size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; - std::atomic_thread_fence(std::memory_order_acquire); + std::scoped_lock lk(mapping_guard); for (size_t i = 0; i < num_pages; i++) { const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; - auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress)); + auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress)); if (ptr == nullptr) [[unlikely]] { compressed_physical_ptr[start_page_d + i] = 0; continue; @@ -157,14 +158,14 @@ void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size compressed_device_addr[phys_addr - 1U] = static_cast(start_page_d + i); InsertCPUBacking(start_page_d + i, new_vaddress, process_id); } - std::atomic_thread_fence(std::memory_order_release); } template void DeviceMemoryManager::Unmap(DAddr address, size_t size) { size_t start_page_d = address >> Memory::YUZU_PAGEBITS; size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; - std::atomic_thread_fence(std::memory_order_acquire); + interface->InvalidateRegion(address, size); + std::scoped_lock lk(mapping_guard); for (size_t i = 0; i < num_pages; i++) { auto phys_addr = compressed_physical_ptr[start_page_d + i]; compressed_physical_ptr[start_page_d + i] = 0; @@ -173,7 +174,6 @@ void DeviceMemoryManager::Unmap(DAddr address, size_t size) { compressed_device_addr[phys_addr - 1] = 0; } } - std::atomic_thread_fence(std::memory_order_release); } template @@ -256,6 +256,45 @@ void DeviceMemoryManager::WalkBlock(DAddr addr, std::size_t size, auto o template void DeviceMemoryManager::ReadBlock(DAddr address, void* dest_pointer, size_t size) { + interface->FlushRegion(address, size); + WalkBlock( + address, size, + [&](size_t copy_amount, DAddr current_vaddr) { + LOG_ERROR( + HW_Memory, + "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, address, size); + std::memset(dest_pointer, 0, copy_amount); + }, + [&](size_t copy_amount, const u8* const src_ptr) { + std::memcpy(dest_pointer, src_ptr, copy_amount); + }, + [&](const std::size_t copy_amount) { + dest_pointer = static_cast(dest_pointer) + copy_amount; + }); +} + +template +void DeviceMemoryManager::WriteBlock(DAddr address, const void* src_pointer, size_t size) { + WalkBlock( + address, size, + [&](size_t copy_amount, DAddr current_vaddr) { + LOG_ERROR( + HW_Memory, + "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", + current_vaddr, address, size); + }, + [&](size_t copy_amount, u8* const dst_ptr) { + std::memcpy(dst_ptr, src_pointer, copy_amount); + }, + [&](const std::size_t copy_amount) { + src_pointer = static_cast(src_pointer) + copy_amount; + }); + interface->InvalidateRegion(address, size); +} + +template +void DeviceMemoryManager::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) { WalkBlock( address, size, [&](size_t copy_amount, DAddr current_vaddr) { @@ -274,7 +313,8 @@ void DeviceMemoryManager::ReadBlock(DAddr address, void* dest_pointer, s } template -void DeviceMemoryManager::WriteBlock(DAddr address, void* src_pointer, size_t size) { +void DeviceMemoryManager::WriteBlockUnsafe(DAddr address, const void* src_pointer, + size_t size) { WalkBlock( address, size, [&](size_t copy_amount, DAddr current_vaddr) { @@ -287,7 +327,7 @@ void DeviceMemoryManager::WriteBlock(DAddr address, void* src_pointer, s std::memcpy(dst_ptr, src_pointer, copy_amount); }, [&](const std::size_t copy_amount) { - src_pointer = static_cast(src_pointer) + copy_amount; + src_pointer = static_cast(src_pointer) + copy_amount; }); } @@ -313,6 +353,18 @@ void DeviceMemoryManager::UnregisterProcess(size_t id) { template void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { + bool locked = false; + auto lock = [&] { + if (!locked) { + counter_guard.lock(); + locked = true; + } + }; + SCOPE_EXIT({ + if (locked) { + counter_guard.unlock(); + } + }); u64 uncache_begin = 0; u64 cache_begin = 0; u64 uncache_bytes = 0; @@ -347,6 +399,7 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size } uncache_bytes += Memory::YUZU_PAGESIZE; } else if (uncache_bytes > 0) { + lock(); MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, false); uncache_bytes = 0; @@ -357,6 +410,7 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size } cache_bytes += Memory::YUZU_PAGESIZE; } else if (cache_bytes > 0) { + lock(); MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, true); cache_bytes = 0; @@ -364,10 +418,12 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size vpage++; } if (uncache_bytes > 0) { + lock(); MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, false); } if (cache_bytes > 0) { + lock(); MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, true); } diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h index 9687531e8..f1abf4f83 100644 --- a/src/core/gpu_dirty_memory_manager.h +++ b/src/core/gpu_dirty_memory_manager.h @@ -23,7 +23,7 @@ public: ~GPUDirtyMemoryManager() = default; - void Collect(VAddr address, size_t size) { + void Collect(PAddr address, size_t size) { TransformAddress t = BuildTransform(address, size); TransformAddress tmp, original; do { @@ -47,7 +47,7 @@ public: std::memory_order_relaxed)); } - void Gather(std::function& callback) { + void Gather(std::function& callback) { { std::scoped_lock lk(guard); TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); @@ -65,7 +65,7 @@ public: mask = mask >> empty_bits; const size_t continuous_bits = std::countr_one(mask); - callback((static_cast(transform.address) << page_bits) + offset, + callback((static_cast(transform.address) << page_bits) + offset, continuous_bits << align_bits); mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; offset += continuous_bits << align_bits; @@ -89,7 +89,7 @@ private: constexpr static size_t align_mask = align_size - 1; constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; - bool IsValid(VAddr address) { + bool IsValid(PAddr address) { return address < (1ULL << 39); } @@ -103,7 +103,7 @@ private: return mask; } - TransformAddress BuildTransform(VAddr address, size_t size) { + TransformAddress BuildTransform(PAddr address, size_t size) { const size_t minor_address = address & page_mask; const size_t minor_bit = minor_address >> align_bits; const size_t top_bit = (minor_address + size + align_mask) >> align_bits; diff --git a/src/core/guest_memory.h b/src/core/guest_memory.h new file mode 100644 index 000000000..0b349cc17 --- /dev/null +++ b/src/core/guest_memory.h @@ -0,0 +1,218 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include + +#include "common/scratch_buffer.h" +#include "core/memory.h" + +namespace Core::Memory { + +enum GuestMemoryFlags : u32 { + Read = 1 << 0, + Write = 1 << 1, + Safe = 1 << 2, + Cached = 1 << 3, + + SafeRead = Read | Safe, + SafeWrite = Write | Safe, + SafeReadWrite = SafeRead | SafeWrite, + SafeReadCachedWrite = SafeReadWrite | Cached, + + UnsafeRead = Read, + UnsafeWrite = Write, + UnsafeReadWrite = UnsafeRead | UnsafeWrite, + UnsafeReadCachedWrite = UnsafeReadWrite | Cached, +}; + +namespace { +template +class GuestMemory { + using iterator = T*; + using const_iterator = const T*; + using value_type = T; + using element_type = T; + using iterator_category = std::contiguous_iterator_tag; + +public: + GuestMemory() = delete; + explicit GuestMemory(M& memory, u64 addr, std::size_t size, + Common::ScratchBuffer* backup = nullptr) + : m_memory{memory}, m_addr{addr}, m_size{size} { + static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); + if constexpr (FLAGS & GuestMemoryFlags::Read) { + Read(addr, size, backup); + } + } + + ~GuestMemory() = default; + + T* data() noexcept { + return m_data_span.data(); + } + + const T* data() const noexcept { + return m_data_span.data(); + } + + size_t size() const noexcept { + return m_size; + } + + size_t size_bytes() const noexcept { + return this->size() * sizeof(T); + } + + [[nodiscard]] T* begin() noexcept { + return this->data(); + } + + [[nodiscard]] const T* begin() const noexcept { + return this->data(); + } + + [[nodiscard]] T* end() noexcept { + return this->data() + this->size(); + } + + [[nodiscard]] const T* end() const noexcept { + return this->data() + this->size(); + } + + T& operator[](size_t index) noexcept { + return m_data_span[index]; + } + + const T& operator[](size_t index) const noexcept { + return m_data_span[index]; + } + + void SetAddressAndSize(u64 addr, std::size_t size) noexcept { + m_addr = addr; + m_size = size; + m_addr_changed = true; + } + + std::span Read(u64 addr, std::size_t size, + Common::ScratchBuffer* backup = nullptr) noexcept { + m_addr = addr; + m_size = size; + if (m_size == 0) { + m_is_data_copy = true; + return {}; + } + + if (this->TrySetSpan()) { + if constexpr (FLAGS & GuestMemoryFlags::Safe) { + m_memory.FlushRegion(m_addr, this->size_bytes()); + } + } else { + if (backup) { + backup->resize_destructive(this->size()); + m_data_span = *backup; + } else { + m_data_copy.resize(this->size()); + m_data_span = std::span(m_data_copy); + } + m_is_data_copy = true; + m_span_valid = true; + if constexpr (FLAGS & GuestMemoryFlags::Safe) { + m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); + } else { + m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); + } + } + return m_data_span; + } + + void Write(std::span write_data) noexcept { + if constexpr (FLAGS & GuestMemoryFlags::Cached) { + m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); + } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { + m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); + } else { + m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); + } + } + + bool TrySetSpan() noexcept { + if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { + m_data_span = {reinterpret_cast(ptr), this->size()}; + m_span_valid = true; + return true; + } + return false; + } + +protected: + bool IsDataCopy() const noexcept { + return m_is_data_copy; + } + + bool AddressChanged() const noexcept { + return m_addr_changed; + } + + M& m_memory; + u64 m_addr{}; + size_t m_size{}; + std::span m_data_span{}; + std::vector m_data_copy{}; + bool m_span_valid{false}; + bool m_is_data_copy{false}; + bool m_addr_changed{false}; +}; + +template +class GuestMemoryScoped : public GuestMemory { +public: + GuestMemoryScoped() = delete; + explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, + Common::ScratchBuffer* backup = nullptr) + : GuestMemory(memory, addr, size, backup) { + if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { + if (!this->TrySetSpan()) { + if (backup) { + this->m_data_span = *backup; + this->m_span_valid = true; + this->m_is_data_copy = true; + } + } + } + } + + ~GuestMemoryScoped() { + if constexpr (FLAGS & GuestMemoryFlags::Write) { + if (this->size() == 0) [[unlikely]] { + return; + } + + if (this->AddressChanged() || this->IsDataCopy()) { + ASSERT(this->m_span_valid); + if constexpr (FLAGS & GuestMemoryFlags::Cached) { + this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); + } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { + this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); + } else { + this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); + } + } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || (FLAGS & GuestMemoryFlags::Cached)) { + this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); + } + } + } +}; +} // namespace + +template +using CpuGuestMemory = GuestMemory; +template +using CpuGuestMemoryScoped = GuestMemoryScoped; + +} // namespace Tegra::Memory diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp index 3f38ceb03..9f6274c7d 100644 --- a/src/core/hle/service/hle_ipc.cpp +++ b/src/core/hle/service/hle_ipc.cpp @@ -22,19 +22,7 @@ #include "core/hle/service/hle_ipc.h" #include "core/hle/service/ipc_helpers.h" #include "core/memory.h" - -namespace { -static thread_local std::array read_buffer_data_a{ - Common::ScratchBuffer(), - Common::ScratchBuffer(), - Common::ScratchBuffer(), -}; -static thread_local std::array read_buffer_data_x{ - Common::ScratchBuffer(), - Common::ScratchBuffer(), - Common::ScratchBuffer(), -}; -} // Anonymous namespace +#include "core/guest_memory.h" namespace Service { @@ -343,48 +331,27 @@ std::vector HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons } std::span HLERequestContext::ReadBufferA(std::size_t buffer_index) const { - static thread_local std::array read_buffer_a{ - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - }; + Core::Memory::CpuGuestMemory gm(memory, 0, 0); ASSERT_OR_EXECUTE_MSG( BufferDescriptorA().size() > buffer_index, { return {}; }, "BufferDescriptorA invalid buffer_index {}", buffer_index); - auto& read_buffer = read_buffer_a[buffer_index]; - return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), - BufferDescriptorA()[buffer_index].Size(), - &read_buffer_data_a[buffer_index]); + return gm.Read(BufferDescriptorA()[buffer_index].Address(), + BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); } std::span HLERequestContext::ReadBufferX(std::size_t buffer_index) const { - static thread_local std::array read_buffer_x{ - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - }; + Core::Memory::CpuGuestMemory gm(memory, 0, 0); ASSERT_OR_EXECUTE_MSG( BufferDescriptorX().size() > buffer_index, { return {}; }, "BufferDescriptorX invalid buffer_index {}", buffer_index); - auto& read_buffer = read_buffer_x[buffer_index]; - return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), - BufferDescriptorX()[buffer_index].Size(), - &read_buffer_data_x[buffer_index]); + return gm.Read(BufferDescriptorX()[buffer_index].Address(), + BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); } std::span HLERequestContext::ReadBuffer(std::size_t buffer_index) const { - static thread_local std::array read_buffer_a{ - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - }; - static thread_local std::array read_buffer_x{ - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - }; + Core::Memory::CpuGuestMemory gm(memory, 0, 0); const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && BufferDescriptorA()[buffer_index].Size()}; @@ -401,18 +368,14 @@ std::span HLERequestContext::ReadBuffer(std::size_t buffer_index) cons ASSERT_OR_EXECUTE_MSG( BufferDescriptorA().size() > buffer_index, { return {}; }, "BufferDescriptorA invalid buffer_index {}", buffer_index); - auto& read_buffer = read_buffer_a[buffer_index]; - return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), - BufferDescriptorA()[buffer_index].Size(), - &read_buffer_data_a[buffer_index]); + return gm.Read(BufferDescriptorA()[buffer_index].Address(), + BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); } else { ASSERT_OR_EXECUTE_MSG( BufferDescriptorX().size() > buffer_index, { return {}; }, "BufferDescriptorX invalid buffer_index {}", buffer_index); - auto& read_buffer = read_buffer_x[buffer_index]; - return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), - BufferDescriptorX()[buffer_index].Size(), - &read_buffer_data_x[buffer_index]); + return gm.Read(BufferDescriptorX()[buffer_index].Address(), + BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); } } diff --git a/src/core/hle/service/hle_ipc.h b/src/core/hle/service/hle_ipc.h index d550a11b7..8329d7265 100644 --- a/src/core/hle/service/hle_ipc.h +++ b/src/core/hle/service/hle_ipc.h @@ -19,8 +19,6 @@ #include "core/hle/ipc.h" #include "core/hle/kernel/k_handle_table.h" #include "core/hle/kernel/svc_common.h" -#include "core/hle/kernel/k_auto_object.h" -#include "core/hle/kernel/k_handle_table.h" union Result; @@ -377,10 +375,6 @@ public: return nullptr; } - Kernel::KScopedAutoObject GetObjectFromHandle(u32 handle) { - return GetClientHandleTable().GetObjectForIpc(handle, thread); - } - [[nodiscard]] std::shared_ptr GetManager() const { return manager.lock(); } @@ -432,6 +426,9 @@ private: Kernel::KernelCore& kernel; Core::Memory::Memory& memory; + + mutable std::array, 3> read_buffer_data_a{}; + mutable std::array, 3> read_buffer_data_x{}; }; } // namespace Service diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index fd6c9aa0c..7879c6f04 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp @@ -2,6 +2,8 @@ // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors // SPDX-License-Identifier: GPL-3.0-or-later +#include + #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" @@ -18,6 +20,7 @@ NvMap::Handle::Handle(u64 size_, Id id_) } NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { + std::scoped_lock lock(mutex); // Handles cannot be allocated twice if (allocated) { return NvResult::AccessDenied; @@ -78,11 +81,9 @@ void NvMap::UnmapHandle(Handle& handle_description) { // Free and unmap the handle from the SMMU auto& smmu = host1x.MemoryManager(); - smmu.Unmap(static_cast(handle_description.pin_virt_address), - handle_description.aligned_size); - smmu.Free(handle_description.pin_virt_address, - static_cast(handle_description.aligned_size)); - handle_description.pin_virt_address = 0; + smmu.Unmap(handle_description.d_address, handle_description.aligned_size); + smmu.Free(handle_description.d_address, static_cast(handle_description.aligned_size)); + handle_description.d_address = 0; } bool NvMap::TryRemoveHandle(const Handle& handle_description) { @@ -123,41 +124,16 @@ std::shared_ptr NvMap::GetHandle(Handle::Id handle) { } } -VAddr NvMap::GetHandleAddress(Handle::Id handle) { +DAddr NvMap::GetHandleAddress(Handle::Id handle) { std::scoped_lock lock(handles_lock); try { - return handles.at(handle)->address; + return handles.at(handle)->d_address; } catch (std::out_of_range&) { return 0; } } -NvResult NvMap::AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id) { - auto handle_description{GetHandle(handle)}; - if (!handle_description) [[unlikely]] { - return NvResult::BadParameter; - } - - if (handle_description->allocated) [[unlikely]] { - return NvResult::InsufficientMemory; - } - - std::scoped_lock lock(handle_description->mutex); - NvResult result = handle_description->Alloc(pFlags, pAlign, pKind, pAddress); - if (result != NvResult::Success) { - return result; - } - auto& smmu = host1x.MemoryManager(); - size_t total_size = static_cast(handle_description->aligned_size); - handle_description->d_address = smmu.Allocate(total_size); - if (handle_description->d_address == 0) { - return NvResult::InsufficientMemory; - } - smmu.Map(handle_description->d_address, handle_description->address, total_size, session_id); - return NvResult::Success; -} - -u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) { +DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_area_pin) { auto handle_description{GetHandle(handle)}; if (!handle_description) [[unlikely]] { return 0; @@ -176,35 +152,38 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) { handle_description->unmap_queue_entry.reset(); handle_description->pins++; - return handle_description->pin_virt_address; + return handle_description->d_address; } } + using namespace std::placeholders; // If not then allocate some space and map it DAddr address{}; auto& smmu = host1x.MemoryManager(); - while ((address = smmu.AllocatePinned( - static_cast(handle_description->aligned_size))) == 0) { + auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); + //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); + while ((address = allocate(static_cast(handle_description->aligned_size))) == 0) { // Free handles until the allocation succeeds std::scoped_lock queueLock(unmap_queue_lock); if (auto freeHandleDesc{unmap_queue.front()}) { // Handles in the unmap queue are guaranteed not to be pinned so don't bother // checking if they are before unmapping std::scoped_lock freeLock(freeHandleDesc->mutex); - if (handle_description->pin_virt_address) + if (handle_description->d_address) UnmapHandle(*freeHandleDesc); } else { LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); } } + handle_description->d_address = address; + smmu.Map(address, handle_description->address, handle_description->aligned_size, session_id); - handle_description->pin_virt_address = static_cast(address); } handle_description->pins++; - return handle_description->pin_virt_address; + return handle_description->d_address; } void NvMap::UnpinHandle(Handle::Id handle) { @@ -255,15 +234,10 @@ std::optional NvMap::FreeHandle(Handle::Id handle, bool interna LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); } else if (handle_description->dupes == 0) { // Force unmap the handle - if (handle_description->pin_virt_address) { + if (handle_description->d_address) { std::scoped_lock queueLock(unmap_queue_lock); UnmapHandle(*handle_description); } - if (handle_description->allocated) { - auto& smmu = host1x.MemoryManager(); - smmu.Free(handle_description->d_address, handle_description->aligned_size); - smmu.Unmap(handle_description->d_address, handle_description->aligned_size); - } handle_description->pins = 0; } diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index 7c3110d91..e9e9e8b5b 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h @@ -48,7 +48,7 @@ public: using Id = u32; Id id; //!< A globally unique identifier for this handle - s32 pins{}; + s64 pins{}; u32 pin_virt_address{}; std::optional>::iterator> unmap_queue_entry{}; @@ -63,15 +63,14 @@ public: VAddr address{}; //!< The memory location in the guest's AS that this handle corresponds to, //!< this can also be in the nvdrv tmem - DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to, - //!< this can also be in the nvdrv tmem bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC //!< call u8 kind{}; //!< Used for memory compression bool allocated{}; //!< If the handle has been allocated with `Alloc` - u64 dma_map_addr{}; //! remove me after implementing pinning. + DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to, + //!< this can also be in the nvdrv tmem Handle(u64 size, Id id); @@ -119,15 +118,7 @@ public: std::shared_ptr GetHandle(Handle::Id handle); - VAddr GetHandleAddress(Handle::Id handle); - - /** - * @brief Maps a handle into the SMMU address space - * @note This operation is refcounted, the number of calls to this must eventually match the - * number of calls to `UnpinHandle` - * @return The SMMU virtual address that the handle has been mapped to - */ - u32 PinHandle(Handle::Id handle, size_t session_id); + DAddr GetHandleAddress(Handle::Id handle); /** * @brief Maps a handle into the SMMU address space @@ -135,7 +126,7 @@ public: * number of calls to `UnpinHandle` * @return The SMMU virtual address that the handle has been mapped to */ - NvResult AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id); + DAddr PinHandle(Handle::Id handle, size_t session_id, bool low_area_pin); /** * @brief When this has been called an equal number of times to `PinHandle` for the supplied diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 0ff41c6b2..f1404b9da 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -42,7 +42,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form u32 height, u32 stride, android::BufferTransformFlags transform, const Common::Rectangle& crop_rect, std::array& fences, u32 num_fences) { - const VAddr addr = nvmap.GetHandleAddress(buffer_handle); + const DAddr addr = nvmap.GetHandleAddress(buffer_handle); LOG_TRACE(Service, "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", addr, offset, width, height, stride, format); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index c92a7b2f6..8bc10eac2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -40,15 +40,15 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span i case 0x3: return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output); case 0x5: - return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output); + return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output, fd); case 0x6: - return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output); + return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output, fd); case 0x8: return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output); case 0x9: return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output); case 0x14: - return WrapVariable(this, &nvhost_as_gpu::Remap, input, output); + return WrapVariable(this, &nvhost_as_gpu::Remap, input, output, fd); default: break; } @@ -86,8 +86,15 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span i return NvResult::NotImplemented; } -void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) {} -void nvhost_as_gpu::OnClose(DeviceFD fd) {} +void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) { + sessions[fd] = session_id; +} +void nvhost_as_gpu::OnClose(DeviceFD fd) { + auto it = sessions.find(fd); + if (it != sessions.end()) { + sessions.erase(it); + } +} NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); @@ -206,6 +213,8 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { static_cast(aligned_size >> page_size_bits)); } + nvmap.UnpinHandle(mapping->handle); + // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Only FreeSpace can unmap them fully if (mapping->sparse_alloc) { @@ -259,7 +268,7 @@ NvResult nvhost_as_gpu::FreeSpace(IoctlFreeSpace& params) { return NvResult::Success; } -NvResult nvhost_as_gpu::Remap(std::span entries) { +NvResult nvhost_as_gpu::Remap(std::span entries, DeviceFD fd) { LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", entries.size()); if (!vm.initialised) { @@ -293,19 +302,19 @@ NvResult nvhost_as_gpu::Remap(std::span entries) { return NvResult::BadValue; } - VAddr cpu_address{static_cast( - handle->address + - (static_cast(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; + DAddr base = nvmap.PinHandle(entry.handle, sessions[fd], false); + DAddr device_address{static_cast( + base + (static_cast(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; - gmmu->Map(virtual_address, cpu_address, size, static_cast(entry.kind), - use_big_pages); + gmmu->Map(virtual_address, device_address, size, + static_cast(entry.kind), use_big_pages); } } return NvResult::Success; } -NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { +NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd) { LOG_DEBUG(Service_NVDRV, "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" ", offset={}", @@ -331,9 +340,9 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { } u64 gpu_address{static_cast(params.offset + params.buffer_offset)}; - VAddr cpu_address{mapping->ptr + params.buffer_offset}; + VAddr device_address{mapping->ptr + params.buffer_offset}; - gmmu->Map(gpu_address, cpu_address, params.mapping_size, + gmmu->Map(gpu_address, device_address, params.mapping_size, static_cast(params.kind), mapping->big_page); return NvResult::Success; @@ -349,7 +358,8 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { return NvResult::BadValue; } - VAddr cpu_address{static_cast(handle->address + params.buffer_offset)}; + DAddr device_address{static_cast(nvmap.PinHandle(params.handle, sessions[fd], false) + + params.buffer_offset)}; u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; bool big_page{[&]() { @@ -373,15 +383,14 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { } const bool use_big_pages = alloc->second.big_pages && big_page; - gmmu->Map(params.offset, cpu_address, size, static_cast(params.kind), + gmmu->Map(params.offset, device_address, size, static_cast(params.kind), use_big_pages); - auto mapping{std::make_shared(cpu_address, params.offset, size, true, - use_big_pages, alloc->second.sparse)}; + auto mapping{std::make_shared(params.handle, device_address, params.offset, size, + true, use_big_pages, alloc->second.sparse)}; alloc->second.mappings.push_back(mapping); mapping_map[params.offset] = mapping; } else { - auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; @@ -394,18 +403,18 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { return NvResult::InsufficientMemory; } - gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), + gmmu->Map(params.offset, device_address, Common::AlignUp(size, page_size), static_cast(params.kind), big_page); - auto mapping{ - std::make_shared(cpu_address, params.offset, size, false, big_page, false)}; + auto mapping{std::make_shared(params.handle, device_address, params.offset, size, + false, big_page, false)}; mapping_map[params.offset] = mapping; } return NvResult::Success; } -NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { +NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd) { LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); std::scoped_lock lock(mutex); @@ -433,6 +442,8 @@ NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { gmmu->Unmap(params.offset, mapping->size); } + nvmap.UnpinHandle(mapping->handle); + mapping_map.erase(params.offset); } catch (const std::out_of_range&) { LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 0dd279f88..4b28f5078 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -141,9 +141,9 @@ private: NvResult AllocAsEx(IoctlAllocAsEx& params); NvResult AllocateSpace(IoctlAllocSpace& params); - NvResult Remap(std::span params); - NvResult MapBufferEx(IoctlMapBufferEx& params); - NvResult UnmapBuffer(IoctlUnmapBuffer& params); + NvResult Remap(std::span params, DeviceFD fd); + NvResult MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd); + NvResult UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd); NvResult FreeSpace(IoctlFreeSpace& params); NvResult BindChannel(IoctlBindChannel& params); @@ -159,16 +159,18 @@ private: NvCore::NvMap& nvmap; struct Mapping { - VAddr ptr; + NvCore::NvMap::Handle::Id handle; + DAddr ptr; u64 offset; u64 size; bool fixed; bool big_page; // Only valid if fixed == false bool sparse_alloc; - Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) - : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), - sparse_alloc(sparse_alloc_) {} + Mapping(NvCore::NvMap::Handle::Id handle_, DAddr ptr_, u64 offset_, u64 size_, bool fixed_, + bool big_page_, bool sparse_alloc_) + : handle(handle_), ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), + big_page(big_page_), sparse_alloc(sparse_alloc_) {} }; struct Allocation { @@ -212,9 +214,7 @@ private: bool initialised{}; } vm; std::shared_ptr gmmu; - - // s32 channel{}; - // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; + std::unordered_map sessions; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 9ab0ae4d8..78bc5f3c4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -95,6 +95,9 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span data, De offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); auto& gpu = system.GPU(); + //auto& device_memory = system.Host1x().MemoryManager(); + auto* session = core.GetSession(sessions[fd]); + if (gpu.UseNvdec()) { for (std::size_t i = 0; i < syncpt_increments.size(); i++) { const SyncptIncr& syncpt_incr = syncpt_increments[i]; @@ -106,7 +109,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span data, De const auto object = nvmap.GetHandle(cmd_buffer.memory_id); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); - system.ApplicationMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), + session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), cmdlist.size() * sizeof(u32)); gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); } @@ -136,7 +139,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span entries, DeviceFD fd) { const size_t num_entries = std::min(params.num_entries, static_cast(entries.size())); for (size_t i = 0; i < num_entries; i++) { - entries[i].map_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd]); + DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true); + entries[i].map_address = static_cast(pin_address); } return NvResult::Success; diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 2b107f009..7765ca1be 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp @@ -123,8 +123,8 @@ NvResult nvmap::IocAlloc(IocAllocParams& params, DeviceFD fd) { return NvResult::InsufficientMemory; } - const auto result = file.AllocateHandle(params.handle, params.flags, params.align, params.kind, - params.address, sessions[fd]); + const auto result = + handle_description->Alloc(params.flags, params.align, params.kind, params.address); if (result != NvResult::Success) { LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); return result; diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index 492ad849a..6e4825313 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp @@ -13,8 +13,6 @@ #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvdrv/nvdrv_interface.h" -#pragma optimize("", off) - namespace Service::Nvidia { void NVDRV::Open(HLERequestContext& ctx) { @@ -173,8 +171,8 @@ void NVDRV::Initialize(HLERequestContext& ctx) { [[maybe_unused]] const auto transfer_memory_size = rp.Pop(); auto& container = nvdrv->GetContainer(); - auto process = ctx.GetObjectFromHandle(process_handle); - session_id = container.OpenSession(process->DynamicCast()); + auto process = ctx.GetObjectFromHandle(process_handle); + session_id = container.OpenSession(process.GetPointerUnsafe()); is_initialized = true; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 8176a41be..609e775ae 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -24,6 +24,8 @@ #include "core/hle/kernel/k_process.h" #include "core/memory.h" #include "video_core/gpu.h" +#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/host1x/host1x.h" #include "video_core/rasterizer_download_area.h" namespace Core::Memory { @@ -638,15 +640,16 @@ struct Memory::Impl { base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); // During boot, current_page_table might not be set yet, in which case we need not flush - if (system.IsPoweredOn()) { + /*if (system.IsPoweredOn()) { auto& gpu = system.GPU(); for (u64 i = 0; i < size; i++) { const auto page = base + i; if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { + gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); } } - } + }*/ const auto end = base + size; ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", @@ -811,10 +814,15 @@ struct Memory::Impl { return true; } - void HandleRasterizerDownload(VAddr address, size_t size) { + void HandleRasterizerDownload(VAddr v_address, size_t size) { + const auto* p = GetPointerImpl( + v_address, []() {}, []() {}); + auto& gpu_device_memory = system.Host1x().MemoryManager(); + DAddr address = + gpu_device_memory.GetAddressFromPAddr(system.DeviceMemory().GetRawPhysicalAddr(p)); const size_t core = system.GetCurrentHostThreadID(); auto& current_area = rasterizer_read_areas[core]; - const VAddr end_address = address + size; + const DAddr end_address = address + size; if (current_area.start_address <= address && end_address <= current_area.end_address) [[likely]] { return; @@ -822,7 +830,10 @@ struct Memory::Impl { current_area = system.GPU().OnCPURead(address, size); } - void HandleRasterizerWrite(VAddr address, size_t size) { + void HandleRasterizerWrite(VAddr v_address, size_t size) { + const auto* p = GetPointerImpl( + v_address, []() {}, []() {}); + PAddr address = system.DeviceMemory().GetRawPhysicalAddr(p); constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; const size_t core = std::min(system.GetCurrentHostThreadID(), sys_core); // any other calls threads go to syscore. @@ -836,7 +847,7 @@ struct Memory::Impl { } }); auto& current_area = rasterizer_write_areas[core]; - VAddr subaddress = address >> YUZU_PAGEBITS; + PAddr subaddress = address >> YUZU_PAGEBITS; bool do_collection = current_area.last_address == subaddress; if (!do_collection) [[unlikely]] { do_collection = system.GPU().OnCPUWrite(address, size); @@ -849,7 +860,7 @@ struct Memory::Impl { } struct GPUDirtyState { - VAddr last_address; + PAddr last_address; }; void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { diff --git a/src/core/memory.h b/src/core/memory.h index dddfaf4a4..47ca6a35a 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -498,209 +498,4 @@ private: std::unique_ptr impl; }; -enum GuestMemoryFlags : u32 { - Read = 1 << 0, - Write = 1 << 1, - Safe = 1 << 2, - Cached = 1 << 3, - - SafeRead = Read | Safe, - SafeWrite = Write | Safe, - SafeReadWrite = SafeRead | SafeWrite, - SafeReadCachedWrite = SafeReadWrite | Cached, - - UnsafeRead = Read, - UnsafeWrite = Write, - UnsafeReadWrite = UnsafeRead | UnsafeWrite, - UnsafeReadCachedWrite = UnsafeReadWrite | Cached, -}; - -namespace { -template -class GuestMemory { - using iterator = T*; - using const_iterator = const T*; - using value_type = T; - using element_type = T; - using iterator_category = std::contiguous_iterator_tag; - -public: - GuestMemory() = delete; - explicit GuestMemory(M& memory, u64 addr, std::size_t size, - Common::ScratchBuffer* backup = nullptr) - : m_memory{memory}, m_addr{addr}, m_size{size} { - static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); - if constexpr (FLAGS & GuestMemoryFlags::Read) { - Read(addr, size, backup); - } - } - - ~GuestMemory() = default; - - T* data() noexcept { - return m_data_span.data(); - } - - const T* data() const noexcept { - return m_data_span.data(); - } - - size_t size() const noexcept { - return m_size; - } - - size_t size_bytes() const noexcept { - return this->size() * sizeof(T); - } - - [[nodiscard]] T* begin() noexcept { - return this->data(); - } - - [[nodiscard]] const T* begin() const noexcept { - return this->data(); - } - - [[nodiscard]] T* end() noexcept { - return this->data() + this->size(); - } - - [[nodiscard]] const T* end() const noexcept { - return this->data() + this->size(); - } - - T& operator[](size_t index) noexcept { - return m_data_span[index]; - } - - const T& operator[](size_t index) const noexcept { - return m_data_span[index]; - } - - void SetAddressAndSize(u64 addr, std::size_t size) noexcept { - m_addr = addr; - m_size = size; - m_addr_changed = true; - } - - std::span Read(u64 addr, std::size_t size, - Common::ScratchBuffer* backup = nullptr) noexcept { - m_addr = addr; - m_size = size; - if (m_size == 0) { - m_is_data_copy = true; - return {}; - } - - if (this->TrySetSpan()) { - if constexpr (FLAGS & GuestMemoryFlags::Safe) { - m_memory.FlushRegion(m_addr, this->size_bytes()); - } - } else { - if (backup) { - backup->resize_destructive(this->size()); - m_data_span = *backup; - } else { - m_data_copy.resize(this->size()); - m_data_span = std::span(m_data_copy); - } - m_is_data_copy = true; - m_span_valid = true; - if constexpr (FLAGS & GuestMemoryFlags::Safe) { - m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); - } else { - m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); - } - } - return m_data_span; - } - - void Write(std::span write_data) noexcept { - if constexpr (FLAGS & GuestMemoryFlags::Cached) { - m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); - } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { - m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); - } else { - m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); - } - } - - bool TrySetSpan() noexcept { - if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { - m_data_span = {reinterpret_cast(ptr), this->size()}; - m_span_valid = true; - return true; - } - return false; - } - -protected: - bool IsDataCopy() const noexcept { - return m_is_data_copy; - } - - bool AddressChanged() const noexcept { - return m_addr_changed; - } - - M& m_memory; - u64 m_addr{}; - size_t m_size{}; - std::span m_data_span{}; - std::vector m_data_copy{}; - bool m_span_valid{false}; - bool m_is_data_copy{false}; - bool m_addr_changed{false}; -}; - -template -class GuestMemoryScoped : public GuestMemory { -public: - GuestMemoryScoped() = delete; - explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, - Common::ScratchBuffer* backup = nullptr) - : GuestMemory(memory, addr, size, backup) { - if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { - if (!this->TrySetSpan()) { - if (backup) { - this->m_data_span = *backup; - this->m_span_valid = true; - this->m_is_data_copy = true; - } - } - } - } - - ~GuestMemoryScoped() { - if constexpr (FLAGS & GuestMemoryFlags::Write) { - if (this->size() == 0) [[unlikely]] { - return; - } - - if (this->AddressChanged() || this->IsDataCopy()) { - ASSERT(this->m_span_valid); - if constexpr (FLAGS & GuestMemoryFlags::Cached) { - this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); - } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { - this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); - } else { - this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); - } - } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || - (FLAGS & GuestMemoryFlags::Cached)) { - this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); - } - } - } -}; -} // namespace - -template -using CpuGuestMemory = GuestMemory; -template -using CpuGuestMemoryScoped = GuestMemoryScoped; -template -using GpuGuestMemory = GuestMemory; -template -using GpuGuestMemoryScoped = GuestMemoryScoped; } // namespace Core::Memory -- cgit v1.2.3