From 813b2ef2530433c95abca97254c5b614f3d8c615 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 11 Mar 2022 17:14:17 -0800 Subject: core: hle: kernel: k_process: Implement thread local storage accurately. --- src/core/hle/kernel/k_process.cpp | 188 ++++++++++++++++++-------------------- src/core/hle/kernel/k_process.h | 18 ++-- src/core/hle/kernel/k_thread.cpp | 4 +- 3 files changed, 99 insertions(+), 111 deletions(-) diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 9233261cd..60dc9a048 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -70,58 +70,6 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority } } // Anonymous namespace -// Represents a page used for thread-local storage. -// -// Each TLS page contains slots that may be used by processes and threads. -// Every process and thread is created with a slot in some arbitrary page -// (whichever page happens to have an available slot). -class TLSPage { -public: - static constexpr std::size_t num_slot_entries = - Core::Memory::PAGE_SIZE / Core::Memory::TLS_ENTRY_SIZE; - - explicit TLSPage(VAddr address) : base_address{address} {} - - bool HasAvailableSlots() const { - return !is_slot_used.all(); - } - - VAddr GetBaseAddress() const { - return base_address; - } - - std::optional ReserveSlot() { - for (std::size_t i = 0; i < is_slot_used.size(); i++) { - if (is_slot_used[i]) { - continue; - } - - is_slot_used[i] = true; - return base_address + (i * Core::Memory::TLS_ENTRY_SIZE); - } - - return std::nullopt; - } - - void ReleaseSlot(VAddr address) { - // Ensure that all given addresses are consistent with how TLS pages - // are intended to be used when releasing slots. - ASSERT(IsWithinPage(address)); - ASSERT((address % Core::Memory::TLS_ENTRY_SIZE) == 0); - - const std::size_t index = (address - base_address) / Core::Memory::TLS_ENTRY_SIZE; - is_slot_used[index] = false; - } - -private: - bool IsWithinPage(VAddr address) const { - return base_address <= address && address < base_address + Core::Memory::PAGE_SIZE; - } - - VAddr base_address; - std::bitset is_slot_used; -}; - ResultCode KProcess::Initialize(KProcess* process, Core::System& system, std::string process_name, ProcessType type, KResourceLimit* res_limit) { auto& kernel = system.Kernel(); @@ -404,7 +352,7 @@ ResultCode KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, } // Create TLS region - tls_region_address = CreateTLSRegion(); + R_TRY(this->CreateThreadLocalRegion(std::addressof(tls_region_address))); memory_reservation.Commit(); return handle_table.Initialize(capabilities.GetHandleTableSize()); @@ -444,7 +392,7 @@ void KProcess::PrepareForTermination() { stop_threads(kernel.System().GlobalSchedulerContext().GetThreadList()); - FreeTLSRegion(tls_region_address); + this->DeleteThreadLocalRegion(tls_region_address); tls_region_address = 0; if (resource_limit) { @@ -487,63 +435,103 @@ void KProcess::Finalize() { KAutoObjectWithSlabHeapAndContainer::Finalize(); } -/** - * Attempts to find a TLS page that contains a free slot for - * use by a thread. - * - * @returns If a page with an available slot is found, then an iterator - * pointing to the page is returned. Otherwise the end iterator - * is returned instead. - */ -static auto FindTLSPageWithAvailableSlots(std::vector& tls_pages) { - return std::find_if(tls_pages.begin(), tls_pages.end(), - [](const auto& page) { return page.HasAvailableSlots(); }); -} +ResultCode KProcess::CreateThreadLocalRegion(VAddr* out) { + KThreadLocalPage* tlp = nullptr; + VAddr tlr = 0; -VAddr KProcess::CreateTLSRegion() { - KScopedSchedulerLock lock(kernel); - if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)}; - tls_page_iter != tls_pages.cend()) { - return *tls_page_iter->ReserveSlot(); - } + // See if we can get a region from a partially used TLP. + { + KScopedSchedulerLock sl{kernel}; + + if (auto it = partially_used_tlp_tree.begin(); it != partially_used_tlp_tree.end()) { + tlr = it->Reserve(); + ASSERT(tlr != 0); + + if (it->IsAllUsed()) { + tlp = std::addressof(*it); + partially_used_tlp_tree.erase(it); + fully_used_tlp_tree.insert(*tlp); + } - Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()}; - ASSERT(tls_page_ptr); + *out = tlr; + return ResultSuccess; + } + } - const VAddr start{page_table->GetKernelMapRegionStart()}; - const VAddr size{page_table->GetKernelMapRegionEnd() - start}; - const PAddr tls_map_addr{kernel.System().DeviceMemory().GetPhysicalAddr(tls_page_ptr)}; - const VAddr tls_page_addr{page_table - ->AllocateAndMapMemory(1, PageSize, true, start, size / PageSize, - KMemoryState::ThreadLocal, - KMemoryPermission::UserReadWrite, - tls_map_addr) - .ValueOr(0)}; + // Allocate a new page. + tlp = KThreadLocalPage::Allocate(kernel); + R_UNLESS(tlp != nullptr, ResultOutOfMemory); + auto tlp_guard = SCOPE_GUARD({ KThreadLocalPage::Free(kernel, tlp); }); - ASSERT(tls_page_addr); + // Initialize the new page. + R_TRY(tlp->Initialize(kernel, this)); - std::memset(tls_page_ptr, 0, PageSize); - tls_pages.emplace_back(tls_page_addr); + // Reserve a TLR. + tlr = tlp->Reserve(); + ASSERT(tlr != 0); - const auto reserve_result{tls_pages.back().ReserveSlot()}; - ASSERT(reserve_result.has_value()); + // Insert into our tree. + { + KScopedSchedulerLock sl{kernel}; + if (tlp->IsAllUsed()) { + fully_used_tlp_tree.insert(*tlp); + } else { + partially_used_tlp_tree.insert(*tlp); + } + } - return *reserve_result; + // We succeeded! + tlp_guard.Cancel(); + *out = tlr; + return ResultSuccess; } -void KProcess::FreeTLSRegion(VAddr tls_address) { - KScopedSchedulerLock lock(kernel); - const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE); - auto iter = - std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { - return page.GetBaseAddress() == aligned_address; - }); +ResultCode KProcess::DeleteThreadLocalRegion(VAddr addr) { + KThreadLocalPage* page_to_free = nullptr; + + // Release the region. + { + KScopedSchedulerLock sl{kernel}; + + // Try to find the page in the partially used list. + auto it = partially_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize)); + if (it == partially_used_tlp_tree.end()) { + // If we don't find it, it has to be in the fully used list. + it = fully_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize)); + R_UNLESS(it != fully_used_tlp_tree.end(), ResultInvalidAddress); + + // Release the region. + it->Release(addr); + + // Move the page out of the fully used list. + KThreadLocalPage* tlp = std::addressof(*it); + fully_used_tlp_tree.erase(it); + if (tlp->IsAllFree()) { + page_to_free = tlp; + } else { + partially_used_tlp_tree.insert(*tlp); + } + } else { + // Release the region. + it->Release(addr); + + // Handle the all-free case. + KThreadLocalPage* tlp = std::addressof(*it); + if (tlp->IsAllFree()) { + partially_used_tlp_tree.erase(it); + page_to_free = tlp; + } + } + } + + // If we should free the page it was in, do so. + if (page_to_free != nullptr) { + page_to_free->Finalize(); - // Something has gone very wrong if we're freeing a region - // with no actual page available. - ASSERT(iter != tls_pages.cend()); + KThreadLocalPage::Free(kernel, page_to_free); + } - iter->ReleaseSlot(tls_address); + return ResultSuccess; } void KProcess::LoadModule(CodeSet code_set, VAddr base_addr) { diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index cf1b67428..5ed0f2d83 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -15,6 +15,7 @@ #include "core/hle/kernel/k_condition_variable.h" #include "core/hle/kernel/k_handle_table.h" #include "core/hle/kernel/k_synchronization_object.h" +#include "core/hle/kernel/k_thread_local_page.h" #include "core/hle/kernel/k_worker_task.h" #include "core/hle/kernel/process_capability.h" #include "core/hle/kernel/slab_helpers.h" @@ -362,10 +363,10 @@ public: // Thread-local storage management // Marks the next available region as used and returns the address of the slot. - [[nodiscard]] VAddr CreateTLSRegion(); + [[nodiscard]] ResultCode CreateThreadLocalRegion(VAddr* out); // Frees a used TLS slot identified by the given address - void FreeTLSRegion(VAddr tls_address); + ResultCode DeleteThreadLocalRegion(VAddr addr); private: void PinThread(s32 core_id, KThread* thread) { @@ -413,13 +414,6 @@ private: /// The ideal CPU core for this process, threads are scheduled on this core by default. u8 ideal_core = 0; - /// The Thread Local Storage area is allocated as processes create threads, - /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part - /// holds the TLS for a specific thread. This vector contains which parts are in use for each - /// page as a bitmask. - /// This vector will grow as more pages are allocated for new threads. - std::vector tls_pages; - /// Contains the parsed process capability descriptors. ProcessCapabilities capabilities; @@ -482,6 +476,12 @@ private: KThread* exception_thread{}; KLightLock state_lock; + + using TLPTree = + Common::IntrusiveRedBlackTreeBaseTraits::TreeType; + using TLPIterator = TLPTree::iterator; + TLPTree fully_used_tlp_tree; + TLPTree partially_used_tlp_tree; }; } // namespace Kernel diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 736f39e91..ba7f72c6b 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -210,7 +210,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s if (owner != nullptr) { // Setup the TLS, if needed. if (type == ThreadType::User) { - tls_address = owner->CreateTLSRegion(); + R_TRY(owner->CreateThreadLocalRegion(std::addressof(tls_address))); } parent = owner; @@ -305,7 +305,7 @@ void KThread::Finalize() { // If the thread has a local region, delete it. if (tls_address != 0) { - parent->FreeTLSRegion(tls_address); + ASSERT(parent->DeleteThreadLocalRegion(tls_address).IsSuccess()); } // Release any waiters. -- cgit v1.2.3