From 8634b8cb83755b6c6554faa11c0e488d2ad21f90 Mon Sep 17 00:00:00 2001 From: Subv Date: Sat, 3 Dec 2016 22:38:14 -0500 Subject: Threading: Reworked the way our scheduler works. Threads will now be awakened when the objects they're waiting on are signaled, instead of repeating the WaitSynchronization call every now and then. The scheduler is now called once after every SVC call, and once after a thread is awakened from sleep by its timeout callback. This new implementation is based off reverse-engineering of the real kernel. See https://gist.github.com/Subv/02f29bd9f1e5deb7aceea1e8f019c8f4 for a more detailed description of how the real kernel handles rescheduling. --- src/core/hle/kernel/address_arbiter.cpp | 2 - src/core/hle/kernel/kernel.cpp | 59 ++++++++++- src/core/hle/kernel/kernel.h | 3 + src/core/hle/kernel/thread.cpp | 97 +---------------- src/core/hle/kernel/thread.h | 22 ++-- src/core/hle/kernel/timer.cpp | 4 - src/core/hle/svc.cpp | 181 ++++++++++++++++++-------------- 7 files changed, 179 insertions(+), 189 deletions(-) (limited to 'src/core/hle') diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 37eec4c84..b5a0cc3a3 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -79,8 +79,6 @@ ResultCode AddressArbiter::ArbitrateAddress(ArbitrationType type, VAddr address, ErrorSummary::WrongArgument, ErrorLevel::Usage); } - HLE::Reschedule(__func__); - // The calls that use a timeout seem to always return a Timeout error even if they did not put // the thread to sleep if (type == ArbitrationType::WaitIfLessThanWithTimeout || diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 0c8752670..be7a5a6d8 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -31,13 +31,62 @@ void WaitObject::RemoveWaitingThread(Thread* thread) { waiting_threads.erase(itr); } -void WaitObject::WakeupAllWaitingThreads() { - for (auto thread : waiting_threads) - thread->ResumeFromWait(); +SharedPtr WaitObject::GetHighestPriorityReadyThread() { + // Remove the threads that are ready or already running from our waitlist + waiting_threads.erase(std::remove_if(waiting_threads.begin(), waiting_threads.end(), [](SharedPtr thread) -> bool { + return thread->status == THREADSTATUS_RUNNING || thread->status == THREADSTATUS_READY; + }), waiting_threads.end()); + + if (waiting_threads.empty()) + return nullptr; - waiting_threads.clear(); + auto candidate_threads = waiting_threads; - HLE::Reschedule(__func__); + // Eliminate all threads that are waiting on more than one object, and not all of them are ready + candidate_threads.erase(std::remove_if(candidate_threads.begin(), candidate_threads.end(), [](SharedPtr thread) -> bool { + for (auto object : thread->wait_objects) + if (object->ShouldWait()) + return true; + return false; + }), candidate_threads.end()); + + // Return the thread with the lowest priority value (The one with the highest priority) + auto thread_itr = std::min_element(candidate_threads.begin(), candidate_threads.end(), [](const SharedPtr& lhs, const SharedPtr& rhs) { + return lhs->current_priority < rhs->current_priority; + }); + + if (thread_itr == candidate_threads.end()) + return nullptr; + + return *thread_itr; +} + +void WaitObject::WakeupAllWaitingThreads() { + // Wake up all threads that can be awoken, in priority order + while (auto thread = GetHighestPriorityReadyThread()) { + if (thread->wait_objects.empty()) { + Acquire(); + // Set the output index of the WaitSynchronizationN call to the index of this object. + if (thread->wait_set_output) { + thread->SetWaitSynchronizationOutput(thread->GetWaitObjectIndex(this)); + thread->wait_set_output = false; + } + } else { + for (auto object : thread->wait_objects) { + object->Acquire(); + // Remove the thread from the object's waitlist + object->RemoveWaitingThread(thread.get()); + } + // Note: This case doesn't update the output index of WaitSynchronizationN. + // Clear the thread's waitlist + thread->wait_objects.clear(); + } + + // Set the result of the call to WaitSynchronization to RESULT_SUCCESS + thread->SetWaitSynchronizationResult(RESULT_SUCCESS); + thread->ResumeFromWait(); + // Note: Removing the thread from the object's waitlist will be done by GetHighestPriorityReadyThread + } } const std::vector>& WaitObject::GetWaitingThreads() const { diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 231cf7b75..eb5a3bf7e 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -155,6 +155,9 @@ public: /// Wake up all threads waiting on this object void WakeupAllWaitingThreads(); + /// Obtains the highest priority thread that is ready to run from this object's waiting list. + SharedPtr GetHighestPriorityReadyThread(); + /// Get a const reference to the waiting threads list for debug use const std::vector>& GetWaitingThreads() const; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 84d6d24c6..49ed9d899 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -120,8 +120,6 @@ void Thread::Stop() { u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE; Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot); - - HLE::Reschedule(__func__); } Thread* ArbitrateHighestPriorityThread(u32 address) { @@ -180,50 +178,6 @@ static void PriorityBoostStarvedThreads() { } } -/** - * Gets the registers for timeout parameter of the next WaitSynchronization call. - * @param thread a pointer to the thread that is ready to call WaitSynchronization - * @returns a tuple of two register pointers to low and high part of the timeout parameter - */ -static std::tuple GetWaitSynchTimeoutParameterRegister(Thread* thread) { - bool thumb_mode = (thread->context.cpsr & TBIT) != 0; - u16 thumb_inst = Memory::Read16(thread->context.pc & 0xFFFFFFFE); - u32 inst = Memory::Read32(thread->context.pc & 0xFFFFFFFC) & 0x0FFFFFFF; - - if ((thumb_mode && thumb_inst == 0xDF24) || (!thumb_mode && inst == 0x0F000024)) { - // svc #0x24 (WaitSynchronization1) - return std::make_tuple(&thread->context.cpu_registers[2], - &thread->context.cpu_registers[3]); - } else if ((thumb_mode && thumb_inst == 0xDF25) || (!thumb_mode && inst == 0x0F000025)) { - // svc #0x25 (WaitSynchronizationN) - return std::make_tuple(&thread->context.cpu_registers[0], - &thread->context.cpu_registers[4]); - } - - UNREACHABLE(); -} - -/** - * Updates the WaitSynchronization timeout parameter according to the difference - * between ticks of the last WaitSynchronization call and the incoming one. - * @param timeout_low a pointer to the register for the low part of the timeout parameter - * @param timeout_high a pointer to the register for the high part of the timeout parameter - * @param last_tick tick of the last WaitSynchronization call - */ -static void UpdateTimeoutParameter(u32* timeout_low, u32* timeout_high, u64 last_tick) { - s64 timeout = ((s64)*timeout_high << 32) | *timeout_low; - - if (timeout != -1) { - timeout -= cyclesToUs(CoreTiming::GetTicks() - last_tick) * 1000; // in nanoseconds - - if (timeout < 0) - timeout = 0; - - *timeout_low = timeout & 0xFFFFFFFF; - *timeout_high = timeout >> 32; - } -} - /** * Switches the CPU's active thread context to that of the specified thread * @param new_thread The thread to switch to @@ -254,32 +208,6 @@ static void SwitchContext(Thread* new_thread) { current_thread = new_thread; - // If the thread was waited by a svcWaitSynch call, step back PC by one instruction to rerun - // the SVC when the thread wakes up. This is necessary to ensure that the thread can acquire - // the requested wait object(s) before continuing. - if (new_thread->waitsynch_waited) { - // CPSR flag indicates CPU mode - bool thumb_mode = (new_thread->context.cpsr & TBIT) != 0; - - // SVC instruction is 2 bytes for THUMB, 4 bytes for ARM - new_thread->context.pc -= thumb_mode ? 2 : 4; - - // Get the register for timeout parameter - u32 *timeout_low, *timeout_high; - std::tie(timeout_low, timeout_high) = GetWaitSynchTimeoutParameterRegister(new_thread); - - // Update the timeout parameter - UpdateTimeoutParameter(timeout_low, timeout_high, new_thread->last_running_ticks); - } - - // Clean up the thread's wait_objects, they'll be restored if needed during - // the svcWaitSynchronization call - for (size_t i = 0; i < new_thread->wait_objects.size(); ++i) { - SharedPtr object = new_thread->wait_objects[i]; - object->RemoveWaitingThread(new_thread); - } - new_thread->wait_objects.clear(); - ready_queue.remove(new_thread->current_priority, new_thread); new_thread->status = THREADSTATUS_RUNNING; @@ -319,17 +247,13 @@ static Thread* PopNextReadyThread() { void WaitCurrentThread_Sleep() { Thread* thread = GetCurrentThread(); thread->status = THREADSTATUS_WAIT_SLEEP; - - HLE::Reschedule(__func__); } void WaitCurrentThread_WaitSynchronization(std::vector> wait_objects, - bool wait_set_output, bool wait_all) { + bool wait_set_output) { Thread* thread = GetCurrentThread(); thread->wait_set_output = wait_set_output; - thread->wait_all = wait_all; thread->wait_objects = std::move(wait_objects); - thread->waitsynch_waited = true; thread->status = THREADSTATUS_WAIT_SYNCH; } @@ -351,15 +275,11 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { return; } - thread->waitsynch_waited = false; - if (thread->status == THREADSTATUS_WAIT_SYNCH || thread->status == THREADSTATUS_WAIT_ARB) { + thread->wait_set_output = false; thread->SetWaitSynchronizationResult(ResultCode(ErrorDescription::Timeout, ErrorModule::OS, ErrorSummary::StatusChanged, ErrorLevel::Info)); - - if (thread->wait_set_output) - thread->SetWaitSynchronizationOutput(-1); } thread->ResumeFromWait(); @@ -399,6 +319,7 @@ void Thread::ResumeFromWait() { ready_queue.push_back(current_priority, this); status = THREADSTATUS_READY; + HLE::Reschedule(__func__); } /** @@ -494,13 +415,11 @@ ResultVal> Thread::Create(std::string name, VAddr entry_point, thread->last_running_ticks = CoreTiming::GetTicks(); thread->processor_id = processor_id; thread->wait_set_output = false; - thread->wait_all = false; thread->wait_objects.clear(); thread->wait_address = 0; thread->name = std::move(name); thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); thread->owner_process = g_current_process; - thread->waitsynch_waited = false; // Find the next available TLS index, and mark it as used auto& tls_slots = Kernel::g_current_process->tls_slots; @@ -555,8 +474,6 @@ ResultVal> Thread::Create(std::string name, VAddr entry_point, ready_queue.push_back(thread->current_priority, thread.get()); thread->status = THREADSTATUS_READY; - HLE::Reschedule(__func__); - return MakeResult>(std::move(thread)); } @@ -619,14 +536,6 @@ void Reschedule() { HLE::DoneRescheduling(); - // Don't bother switching to the same thread. - // But if the thread was waiting on objects, we still need to switch it - // to perform PC modification, change state to RUNNING, etc. - // This occurs in the case when an object the thread is waiting on immediately wakes up - // the current thread before Reschedule() is called. - if (next == cur && (next == nullptr || next->waitsynch_waited == false)) - return; - if (cur && next) { LOG_TRACE(Kernel, "context switch %u -> %u", cur->GetObjectId(), next->GetObjectId()); } else if (cur) { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index e0ffcea8a..63b97b74f 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include "common/common_types.h" @@ -124,6 +125,16 @@ public: */ void SetWaitSynchronizationOutput(s32 output); + /** + * Retrieves the index that this particular object occupies in the list of objects + * that the thread passed to WaitSynchronizationN. + * It is used to set the output value of WaitSynchronizationN when the thread is awakened. + * @param object Object to query the index of. + */ + s32 GetWaitObjectIndex(WaitObject* object) { + return wait_objects_index[object->GetObjectId()]; + } + /** * Stops a thread, invalidating it from further use */ @@ -154,16 +165,16 @@ public: VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread - bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait - /// Mutexes currently held by this thread, which will be released when it exits. boost::container::flat_set> held_mutexes; SharedPtr owner_process; ///< Process that owns this thread std::vector> wait_objects; ///< Objects that the thread is waiting on + std::unordered_map wait_objects_index; ///< Mapping of Object ids to their position in the last waitlist that this object waited on. + VAddr wait_address; ///< If waiting on an AddressArbiter, this is the arbitration address - bool wait_all; ///< True if the thread is waiting on all objects before resuming - bool wait_set_output; ///< True if the output parameter should be set on thread wakeup + + bool wait_set_output; ///< True if the WaitSynchronizationN output parameter should be set on thread wakeup std::string name; @@ -215,10 +226,9 @@ void WaitCurrentThread_Sleep(); * @param wait_objects Kernel objects that we are waiting on * @param wait_set_output If true, set the output parameter on thread wakeup (for * WaitSynchronizationN only) - * @param wait_all If true, wait on all objects before resuming (for WaitSynchronizationN only) */ void WaitCurrentThread_WaitSynchronization(std::vector> wait_objects, - bool wait_set_output, bool wait_all); + bool wait_set_output); /** * Waits the current thread from an ArbitrateAddress call diff --git a/src/core/hle/kernel/timer.cpp b/src/core/hle/kernel/timer.cpp index eac181f4e..b50cf520d 100644 --- a/src/core/hle/kernel/timer.cpp +++ b/src/core/hle/kernel/timer.cpp @@ -60,14 +60,10 @@ void Timer::Set(s64 initial, s64 interval) { u64 initial_microseconds = initial / 1000; CoreTiming::ScheduleEvent(usToCycles(initial_microseconds), timer_callback_event_type, callback_handle); - - HLE::Reschedule(__func__); } void Timer::Cancel() { CoreTiming::UnscheduleEvent(timer_callback_event_type, callback_handle); - - HLE::Reschedule(__func__); } void Timer::Clear() { diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index c6b80dc50..061692af8 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -249,27 +249,30 @@ static ResultCode WaitSynchronization1(Handle handle, s64 nano_seconds) { auto object = Kernel::g_handle_table.GetWaitObject(handle); Kernel::Thread* thread = Kernel::GetCurrentThread(); - thread->waitsynch_waited = false; - if (object == nullptr) return ERR_INVALID_HANDLE; LOG_TRACE(Kernel_SVC, "called handle=0x%08X(%s:%s), nanoseconds=%lld", handle, object->GetTypeName().c_str(), object->GetName().c_str(), nano_seconds); - HLE::Reschedule(__func__); - - // Check for next thread to schedule if (object->ShouldWait()) { + if (nano_seconds == 0) + return ResultCode(ErrorDescription::Timeout, ErrorModule::OS, + ErrorSummary::StatusChanged, + ErrorLevel::Info); + object->AddWaitingThread(thread); - Kernel::WaitCurrentThread_WaitSynchronization({object}, false, false); + thread->status = THREADSTATUS_WAIT_SYNCH; // Create an event to wake the thread up after the specified nanosecond delay has passed thread->WakeAfterDelay(nano_seconds); - // NOTE: output of this SVC will be set later depending on how the thread resumes - return HLE::RESULT_INVALID; + // Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to a signal in one of its wait objects. + // Otherwise we retain the default value of timeout. + return ResultCode(ErrorDescription::Timeout, ErrorModule::OS, + ErrorSummary::StatusChanged, + ErrorLevel::Info); } object->Acquire(); @@ -283,8 +286,6 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou bool wait_thread = !wait_all; int handle_index = 0; Kernel::Thread* thread = Kernel::GetCurrentThread(); - bool was_waiting = thread->waitsynch_waited; - thread->waitsynch_waited = false; // Check if 'handles' is invalid if (handles == nullptr) @@ -300,90 +301,113 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou return ResultCode(ErrorDescription::OutOfRange, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); - // If 'handle_count' is non-zero, iterate through each handle and wait the current thread if - // necessary - if (handle_count != 0) { - bool selected = false; // True once an object has been selected - - Kernel::SharedPtr wait_object; - - for (int i = 0; i < handle_count; ++i) { - auto object = Kernel::g_handle_table.GetWaitObject(handles[i]); - if (object == nullptr) - return ERR_INVALID_HANDLE; - - // Check if the current thread should wait on this object... - if (object->ShouldWait()) { - - // Check we are waiting on all objects... - if (wait_all) - // Wait the thread - wait_thread = true; - } else { - // Do not wait on this object, check if this object should be selected... - if (!wait_all && (!selected || (wait_object == object && was_waiting))) { - // Do not wait the thread - wait_thread = false; - handle_index = i; - wait_object = object; - selected = true; - } - } - } - } else { - // If no handles were passed in, put the thread to sleep only when 'wait_all' is false - // NOTE: This should deadlock the current thread if no timeout was specified - if (!wait_all) { - wait_thread = true; - } + using ObjectPtr = Kernel::SharedPtr; + + std::vector objects(handle_count); + + for (int i = 0; i < handle_count; ++i) { + auto object = Kernel::g_handle_table.GetWaitObject(handles[i]); + if (object == nullptr) + return ERR_INVALID_HANDLE; + objects[i] = object; } - SCOPE_EXIT({ - HLE::Reschedule("WaitSynchronizationN"); - }); // Reschedule after putting the threads to sleep. + // Clear the mapping of wait object indices + thread->wait_objects_index.clear(); + + if (!wait_all) { + // Find the first object that is acquireable in the provided list of objects + auto itr = std::find_if(objects.begin(), objects.end(), [](const ObjectPtr& object) { + return !object->ShouldWait(); + }); + + if (itr != objects.end()) { + // We found a ready object, acquire it and set the result value + ObjectPtr object = *itr; + object->Acquire(); + *out = std::distance(objects.begin(), itr); + return RESULT_SUCCESS; + } + + // No objects were ready to be acquired, prepare to suspend the thread. + + // If a timeout value of 0 was provided, just return the Timeout error code instead of suspending the thread. + if (nano_seconds == 0) { + return ResultCode(ErrorDescription::Timeout, ErrorModule::OS, + ErrorSummary::StatusChanged, + ErrorLevel::Info); + } - // If thread should wait, then set its state to waiting - if (wait_thread) { + // Put the thread to sleep + thread->status = THREADSTATUS_WAIT_SYNCH; - // Actually wait the current thread on each object if we decided to wait... - std::vector> wait_objects; - wait_objects.reserve(handle_count); + // Clear the thread's waitlist, we won't use it for wait_all = false + thread->wait_objects.clear(); - for (int i = 0; i < handle_count; ++i) { - auto object = Kernel::g_handle_table.GetWaitObject(handles[i]); - object->AddWaitingThread(Kernel::GetCurrentThread()); - wait_objects.push_back(object); + // Add the thread to each of the objects' waiting threads. + for (int i = 0; i < objects.size(); ++i) { + ObjectPtr object = objects[i]; + // Set the index of this object in the mapping of Objects -> index for this thread. + thread->wait_objects_index[object->GetObjectId()] = i; + object->AddWaitingThread(thread); + // TODO(Subv): Perform things like update the mutex lock owner's priority to prevent priority inversion. } - Kernel::WaitCurrentThread_WaitSynchronization(std::move(wait_objects), true, wait_all); + // Note: If no handles and no timeout were given, then the thread will deadlock, this is consistent with hardware behavior. // Create an event to wake the thread up after the specified nanosecond delay has passed - Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds); + thread->WakeAfterDelay(nano_seconds); - // NOTE: output of this SVC will be set later depending on how the thread resumes - return HLE::RESULT_INVALID; - } + // Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to a signal in one of its wait objects. + // Otherwise we retain the default value of timeout, and -1 in the out parameter + thread->wait_set_output = true; + *out = -1; + return ResultCode(ErrorDescription::Timeout, ErrorModule::OS, + ErrorSummary::StatusChanged, + ErrorLevel::Info); + } else { + bool all_available = std::all_of(objects.begin(), objects.end(), [](const ObjectPtr& object) { + return !object->ShouldWait(); + }); + if (all_available) { + // We can acquire all objects right now, do so. + for (auto object : objects) + object->Acquire(); + // Note: In this case, the `out` parameter is not set, and retains whatever value it had before. + return RESULT_SUCCESS; + } - // Acquire objects if we did not wait... - for (int i = 0; i < handle_count; ++i) { - auto object = Kernel::g_handle_table.GetWaitObject(handles[i]); + // Not all objects were available right now, prepare to suspend the thread. - // Acquire the object if it is not waiting... - if (!object->ShouldWait()) { - object->Acquire(); + // If a timeout value of 0 was provided, just return the Timeout error code instead of suspending the thread. + if (nano_seconds == 0) { + return ResultCode(ErrorDescription::Timeout, ErrorModule::OS, + ErrorSummary::StatusChanged, + ErrorLevel::Info); + } + + // Put the thread to sleep + thread->status = THREADSTATUS_WAIT_SYNCH; - // If this was the first non-waiting object and 'wait_all' is false, don't acquire - // any other objects - if (!wait_all) - break; + // Set the thread's waitlist to the list of objects passed to WaitSynchronizationN + thread->wait_objects = objects; + + // Add the thread to each of the objects' waiting threads. + for (auto object : objects) { + object->AddWaitingThread(thread); + // TODO(Subv): Perform things like update the mutex lock owner's priority to prevent priority inversion. } - } - // TODO(bunnei): If 'wait_all' is true, this is probably wrong. However, real hardware does - // not seem to set it to any meaningful value. - *out = handle_count != 0 ? (wait_all ? -1 : handle_index) : 0; + // Create an event to wake the thread up after the specified nanosecond delay has passed + thread->WakeAfterDelay(nano_seconds); - return RESULT_SUCCESS; + // This value gets set to -1 by default in this case, it is not modified after this. + *out = -1; + // Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to a signal in one of its wait objects. + return ResultCode(ErrorDescription::Timeout, ErrorModule::OS, + ErrorSummary::StatusChanged, + ErrorLevel::Info); + } } /// Create an address arbiter (to allocate access to shared resources) @@ -1148,6 +1172,7 @@ void CallSVC(u32 immediate) { if (info) { if (info->func) { info->func(); + HLE::Reschedule(__func__); } else { LOG_ERROR(Kernel_SVC, "unimplemented SVC function %s(..)", info->name); } -- cgit v1.2.3