diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 | ||||
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_64.cpp | 12 | ||||
-rw-r--r-- | src/core/device_memory.cpp | 2 | ||||
-rw-r--r-- | src/core/device_memory.h | 17 | ||||
-rw-r--r-- | src/core/hle/kernel/k_light_condition_variable.h | 43 | ||||
-rw-r--r-- | src/core/hle/kernel/k_light_lock.cpp | 19 | ||||
-rw-r--r-- | src/core/hle/kernel/k_process.cpp | 16 | ||||
-rw-r--r-- | src/core/hle/kernel/k_resource_limit.cpp | 2 | ||||
-rw-r--r-- | src/core/memory.cpp | 18 |
9 files changed, 88 insertions, 47 deletions
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index cea7f0fb1..c8f6dc765 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -128,6 +128,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* if (page_table) { config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( page_table->pointers.data()); + config.fastmem_pointer = page_table->fastmem_arena; } config.absolute_offset_page_table = true; config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; @@ -143,7 +144,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* // Code cache size config.code_cache_size = 512 * 1024 * 1024; - config.far_code_offset = 256 * 1024 * 1024; + config.far_code_offset = 400 * 1024 * 1024; // Safe optimizations if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { @@ -171,6 +172,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_reduce_misalign_checks) { config.only_detect_misalignment_via_page_table_on_page_boundary = false; } + if (!Settings::values.cpuopt_fastmem) { + config.fastmem_pointer = nullptr; + } } // Unsafe optimizations diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 63193dcb1..ba524cd05 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -160,6 +160,10 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* config.absolute_offset_page_table = true; config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; config.only_detect_misalignment_via_page_table_on_page_boundary = true; + + config.fastmem_pointer = page_table->fastmem_arena; + config.fastmem_address_space_bits = address_space_bits; + config.silently_mirror_fastmem = false; } // Multi-process state @@ -181,7 +185,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* // Code cache size config.code_cache_size = 512 * 1024 * 1024; - config.far_code_offset = 256 * 1024 * 1024; + config.far_code_offset = 400 * 1024 * 1024; // Safe optimizations if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { @@ -209,6 +213,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_reduce_misalign_checks) { config.only_detect_misalignment_via_page_table_on_page_boundary = false; } + if (!Settings::values.cpuopt_fastmem) { + config.fastmem_pointer = nullptr; + } } // Unsafe optimizations @@ -223,6 +230,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } + if (Settings::values.cpuopt_unsafe_fastmem_check.GetValue()) { + config.fastmem_address_space_bits = 64; + } } return std::make_shared<Dynarmic::A64::Jit>(config); diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp index 0c4b440ed..f19c0515f 100644 --- a/src/core/device_memory.cpp +++ b/src/core/device_memory.cpp @@ -6,7 +6,7 @@ namespace Core { -DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size} {} +DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size, 1ULL << 39} {} DeviceMemory::~DeviceMemory() = default; } // namespace Core diff --git a/src/core/device_memory.h b/src/core/device_memory.h index 5b1ae28f3..c4d17705f 100644 --- a/src/core/device_memory.h +++ b/src/core/device_memory.h @@ -5,7 +5,7 @@ #pragma once #include "common/common_types.h" -#include "common/virtual_buffer.h" +#include "common/host_memory.h" namespace Core { @@ -21,27 +21,30 @@ enum : u64 { }; }; // namespace DramMemoryMap -class DeviceMemory : NonCopyable { +class DeviceMemory { public: explicit DeviceMemory(); ~DeviceMemory(); + DeviceMemory& operator=(const DeviceMemory&) = delete; + DeviceMemory(const DeviceMemory&) = delete; + template <typename T> PAddr GetPhysicalAddr(const T* ptr) const { - return (reinterpret_cast<uintptr_t>(ptr) - reinterpret_cast<uintptr_t>(buffer.data())) + + return (reinterpret_cast<uintptr_t>(ptr) - + reinterpret_cast<uintptr_t>(buffer.BackingBasePointer())) + DramMemoryMap::Base; } u8* GetPointer(PAddr addr) { - return buffer.data() + (addr - DramMemoryMap::Base); + return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base); } const u8* GetPointer(PAddr addr) const { - return buffer.data() + (addr - DramMemoryMap::Base); + return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base); } -private: - Common::VirtualBuffer<u8> buffer; + Common::HostMemory buffer; }; } // namespace Core diff --git a/src/core/hle/kernel/k_light_condition_variable.h b/src/core/hle/kernel/k_light_condition_variable.h index ca2e539a7..a95fa41f3 100644 --- a/src/core/hle/kernel/k_light_condition_variable.h +++ b/src/core/hle/kernel/k_light_condition_variable.h @@ -18,41 +18,58 @@ class KernelCore; class KLightConditionVariable { public: - explicit KLightConditionVariable(KernelCore& kernel_) - : thread_queue(kernel_), kernel(kernel_) {} + explicit KLightConditionVariable(KernelCore& kernel_) : kernel{kernel_} {} - void Wait(KLightLock* lock, s64 timeout = -1) { - WaitImpl(lock, timeout); - lock->Lock(); + void Wait(KLightLock* lock, s64 timeout = -1, bool allow_terminating_thread = true) { + WaitImpl(lock, timeout, allow_terminating_thread); } void Broadcast() { KScopedSchedulerLock lk{kernel}; - while (thread_queue.WakeupFrontThread() != nullptr) { - // We want to signal all threads, and so should continue waking up until there's nothing - // to wake. + + // Signal all threads. + for (auto& thread : wait_list) { + thread.SetState(ThreadState::Runnable); } } private: - void WaitImpl(KLightLock* lock, s64 timeout) { + void WaitImpl(KLightLock* lock, s64 timeout, bool allow_terminating_thread) { KThread* owner = GetCurrentThreadPointer(kernel); // Sleep the thread. { - KScopedSchedulerLockAndSleep lk(kernel, owner, timeout); - lock->Unlock(); + KScopedSchedulerLockAndSleep lk{kernel, owner, timeout}; - if (!thread_queue.SleepThread(owner)) { + if (!allow_terminating_thread && owner->IsTerminationRequested()) { lk.CancelSleep(); return; } + + lock->Unlock(); + + // Set the thread as waiting. + GetCurrentThread(kernel).SetState(ThreadState::Waiting); + + // Add the thread to the queue. + wait_list.push_back(GetCurrentThread(kernel)); + } + + // Remove the thread from the wait list. + { + KScopedSchedulerLock sl{kernel}; + + wait_list.erase(wait_list.iterator_to(GetCurrentThread(kernel))); } // Cancel the task that the sleep setup. kernel.TimeManager().UnscheduleTimeEvent(owner); + + // Re-acquire the lock. + lock->Lock(); } - KThreadQueue thread_queue; + KernelCore& kernel; + KThread::WaiterList wait_list{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_light_lock.cpp b/src/core/hle/kernel/k_light_lock.cpp index f974022e8..0896e705f 100644 --- a/src/core/hle/kernel/k_light_lock.cpp +++ b/src/core/hle/kernel/k_light_lock.cpp @@ -59,11 +59,7 @@ void KLightLock::LockSlowPath(uintptr_t _owner, uintptr_t _cur_thread) { owner_thread->AddWaiter(cur_thread); // Set thread states. - if (cur_thread->GetState() == ThreadState::Runnable) { - cur_thread->SetState(ThreadState::Waiting); - } else { - KScheduler::SetSchedulerUpdateNeeded(kernel); - } + cur_thread->SetState(ThreadState::Waiting); if (owner_thread->IsSuspended()) { owner_thread->ContinueIfHasKernelWaiters(); @@ -73,10 +69,9 @@ void KLightLock::LockSlowPath(uintptr_t _owner, uintptr_t _cur_thread) { // We're no longer waiting on the lock owner. { KScopedSchedulerLock sl{kernel}; - KThread* owner_thread = cur_thread->GetLockOwner(); - if (owner_thread) { + + if (KThread* owner_thread = cur_thread->GetLockOwner(); owner_thread != nullptr) { owner_thread->RemoveWaiter(cur_thread); - KScheduler::SetSchedulerUpdateNeeded(kernel); } } } @@ -95,17 +90,13 @@ void KLightLock::UnlockSlowPath(uintptr_t _cur_thread) { // Pass the lock to the next owner. uintptr_t next_tag = 0; - if (next_owner) { + if (next_owner != nullptr) { next_tag = reinterpret_cast<uintptr_t>(next_owner); if (num_waiters > 1) { next_tag |= 0x1; } - if (next_owner->GetState() == ThreadState::Waiting) { - next_owner->SetState(ThreadState::Runnable); - } else { - KScheduler::SetSchedulerUpdateNeeded(kernel); - } + next_owner->SetState(ThreadState::Runnable); if (next_owner->IsSuspended()) { next_owner->ContinueIfHasKernelWaiters(); diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 06b8ce151..d1bd98051 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -201,17 +201,15 @@ bool KProcess::ReleaseUserException(KThread* thread) { // Remove waiter thread. s32 num_waiters{}; - KThread* next = thread->RemoveWaiterByKey( - std::addressof(num_waiters), - reinterpret_cast<uintptr_t>(std::addressof(exception_thread))); - if (next != nullptr) { - if (next->GetState() == ThreadState::Waiting) { - next->SetState(ThreadState::Runnable); - } else { - KScheduler::SetSchedulerUpdateNeeded(kernel); - } + if (KThread* next = thread->RemoveWaiterByKey( + std::addressof(num_waiters), + reinterpret_cast<uintptr_t>(std::addressof(exception_thread))); + next != nullptr) { + next->SetState(ThreadState::Runnable); } + KScheduler::SetSchedulerUpdateNeeded(kernel); + return true; } else { return false; diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp index f91cb65dc..da88f35bc 100644 --- a/src/core/hle/kernel/k_resource_limit.cpp +++ b/src/core/hle/kernel/k_resource_limit.cpp @@ -117,7 +117,7 @@ bool KResourceLimit::Reserve(LimitableResource which, s64 value, s64 timeout) { if (current_hints[index] + value <= limit_values[index] && (timeout < 0 || core_timing->GetGlobalTimeNs().count() < timeout)) { waiter_count++; - cond_var.Wait(&lock, timeout); + cond_var.Wait(&lock, timeout, false); waiter_count--; } else { break; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9857278f6..f285c6f63 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -12,6 +12,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/page_table.h" +#include "common/settings.h" #include "common/swap.h" #include "core/arm/arm_interface.h" #include "core/core.h" @@ -32,6 +33,7 @@ struct Memory::Impl { void SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) { current_page_table = &process.PageTable().PageTableImpl(); + current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); @@ -41,13 +43,23 @@ struct Memory::Impl { void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); + ASSERT_MSG(target >= DramMemoryMap::Base && target < DramMemoryMap::End, + "Out of bounds target: {:016X}", target); MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); + + if (Settings::IsFastmemEnabled()) { + system.DeviceMemory().buffer.Map(base, target - DramMemoryMap::Base, size); + } } void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); + + if (Settings::IsFastmemEnabled()) { + system.DeviceMemory().buffer.Unmap(base, size); + } } bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const { @@ -466,6 +478,12 @@ struct Memory::Impl { if (vaddr == 0) { return; } + + if (Settings::IsFastmemEnabled()) { + const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; + system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); + } + // Iterate over a contiguous CPU address space, which corresponds to the specified GPU // address space, marking the region as un/cached. The region is marked un/cached at a // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size |