From bd09c825218aac9255643b9aa7c75f5ba156038c Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 19:17:50 -0500 Subject: common: Implement a high resolution steady clock This implementation provides a consistent, high performance, and high resolution clock where/when std::chrono::steady_clock does not provide sufficient precision. --- src/common/CMakeLists.txt | 2 ++ src/common/steady_clock.cpp | 56 +++++++++++++++++++++++++++++++++++++++++++++ src/common/steady_clock.h | 23 +++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 src/common/steady_clock.cpp create mode 100644 src/common/steady_clock.h (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 56b247ac4..9f5d4c265 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -113,6 +113,8 @@ add_library(common STATIC socket_types.h spin_lock.cpp spin_lock.h + steady_clock.cpp + steady_clock.h stream.cpp stream.h string_util.cpp diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp new file mode 100644 index 000000000..0d5908aa7 --- /dev/null +++ b/src/common/steady_clock.cpp @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#if defined(_WIN32) +#include +#else +#include +#endif + +#include "common/steady_clock.h" + +namespace Common { + +#ifdef _WIN32 +static s64 WindowsQueryPerformanceFrequency() { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return frequency.QuadPart; +} + +static s64 WindowsQueryPerformanceCounter() { + LARGE_INTEGER counter; + QueryPerformanceCounter(&counter); + return counter.QuadPart; +} +#endif + +SteadyClock::time_point SteadyClock::Now() noexcept { +#if defined(_WIN32) + static const auto freq = WindowsQueryPerformanceFrequency(); + const auto counter = WindowsQueryPerformanceCounter(); + + // 10 MHz is a very common QPC frequency on modern PCs. + // Optimizing for this specific frequency can double the performance of + // this function by avoiding the expensive frequency conversion path. + static constexpr s64 TenMHz = 10'000'000; + + if (freq == TenMHz) [[likely]] { + static_assert(period::den % TenMHz == 0); + static constexpr s64 Multiplier = period::den / TenMHz; + return time_point{duration{counter * Multiplier}}; + } + + const auto whole = (counter / freq) * period::den; + const auto part = (counter % freq) * period::den / freq; + return time_point{duration{whole + part}}; +#elif defined(__APPLE__) + return time_point{duration{clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW)}}; +#else + timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return time_point{std::chrono::seconds{ts.tv_sec} + std::chrono::nanoseconds{ts.tv_nsec}}; +#endif +} + +}; // namespace Common diff --git a/src/common/steady_clock.h b/src/common/steady_clock.h new file mode 100644 index 000000000..9497cf865 --- /dev/null +++ b/src/common/steady_clock.h @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Common { + +struct SteadyClock { + using rep = s64; + using period = std::nano; + using duration = std::chrono::nanoseconds; + using time_point = std::chrono::time_point; + + static constexpr bool is_steady = true; + + [[nodiscard]] static time_point Now() noexcept; +}; + +} // namespace Common -- cgit v1.2.3 From 1ed49f92dd56289e6e31a967e602c65ccedd4ff1 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 19:27:10 -0500 Subject: common: Implement a method to change the Windows timer resolution This utilizes undocumented NtDll functions to change the current timer resolution from the default of 1ms. --- src/common/CMakeLists.txt | 8 +++ src/common/windows/timer_resolution.cpp | 87 +++++++++++++++++++++++++++++++++ src/common/windows/timer_resolution.h | 38 ++++++++++++++ 3 files changed, 133 insertions(+) create mode 100644 src/common/windows/timer_resolution.cpp create mode 100644 src/common/windows/timer_resolution.h (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9f5d4c265..58ff5f2f3 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -144,6 +144,14 @@ add_library(common STATIC zstd_compression.h ) +if (WIN32) + target_sources(common PRIVATE + windows/timer_resolution.cpp + windows/timer_resolution.h + ) + target_link_libraries(common PRIVATE ntdll) +endif() + if(ARCHITECTURE_x86_64) target_sources(common PRIVATE diff --git a/src/common/windows/timer_resolution.cpp b/src/common/windows/timer_resolution.cpp new file mode 100644 index 000000000..6c2063a4c --- /dev/null +++ b/src/common/windows/timer_resolution.cpp @@ -0,0 +1,87 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "common/windows/timer_resolution.h" + +extern "C" { +// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtQueryTimerResolution.html +NTSYSAPI LONG NTAPI NtQueryTimerResolution(PULONG MinimumResolution, PULONG MaximumResolution, + PULONG CurrentResolution); + +// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtSetTimerResolution.html +NTSYSAPI LONG NTAPI NtSetTimerResolution(ULONG DesiredResolution, BOOLEAN SetResolution, + PULONG CurrentResolution); + +// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FNT%20Objects%2FThread%2FNtDelayExecution.html +NTSYSAPI LONG NTAPI NtDelayExecution(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval); +} + +namespace Common::Windows { + +namespace { + +using namespace std::chrono; + +constexpr nanoseconds ToNS(ULONG hundred_ns) { + return nanoseconds{hundred_ns * 100}; +} + +constexpr ULONG ToHundredNS(nanoseconds ns) { + return static_cast(ns.count()) / 100; +} + +struct TimerResolution { + std::chrono::nanoseconds minimum; + std::chrono::nanoseconds maximum; + std::chrono::nanoseconds current; +}; + +TimerResolution GetTimerResolution() { + ULONG MinimumTimerResolution; + ULONG MaximumTimerResolution; + ULONG CurrentTimerResolution; + NtQueryTimerResolution(&MinimumTimerResolution, &MaximumTimerResolution, + &CurrentTimerResolution); + return { + .minimum{ToNS(MinimumTimerResolution)}, + .maximum{ToNS(MaximumTimerResolution)}, + .current{ToNS(CurrentTimerResolution)}, + }; +} + +} // Anonymous namespace + +nanoseconds GetMinimumTimerResolution() { + return GetTimerResolution().minimum; +} + +nanoseconds GetMaximumTimerResolution() { + return GetTimerResolution().maximum; +} + +nanoseconds GetCurrentTimerResolution() { + return GetTimerResolution().current; +} + +nanoseconds SetCurrentTimerResolution(nanoseconds timer_resolution) { + // Set the timer resolution, and return the current timer resolution. + const auto DesiredTimerResolution = ToHundredNS(timer_resolution); + ULONG CurrentTimerResolution; + NtSetTimerResolution(DesiredTimerResolution, TRUE, &CurrentTimerResolution); + return ToNS(CurrentTimerResolution); +} + +nanoseconds SetCurrentTimerResolutionToMaximum() { + return SetCurrentTimerResolution(GetMaximumTimerResolution()); +} + +void SleepForOneTick() { + LARGE_INTEGER DelayInterval{ + .QuadPart{-1}, + }; + NtDelayExecution(FALSE, &DelayInterval); +} + +} // namespace Common::Windows diff --git a/src/common/windows/timer_resolution.h b/src/common/windows/timer_resolution.h new file mode 100644 index 000000000..e1e50a62d --- /dev/null +++ b/src/common/windows/timer_resolution.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace Common::Windows { + +/// Returns the minimum (least precise) supported timer resolution in nanoseconds. +std::chrono::nanoseconds GetMinimumTimerResolution(); + +/// Returns the maximum (most precise) supported timer resolution in nanoseconds. +std::chrono::nanoseconds GetMaximumTimerResolution(); + +/// Returns the current timer resolution in nanoseconds. +std::chrono::nanoseconds GetCurrentTimerResolution(); + +/** + * Sets the current timer resolution. + * + * @param timer_resolution Timer resolution in nanoseconds. + * + * @returns The current timer resolution. + */ +std::chrono::nanoseconds SetCurrentTimerResolution(std::chrono::nanoseconds timer_resolution); + +/** + * Sets the current timer resolution to the maximum supported timer resolution. + * + * @returns The current timer resolution. + */ +std::chrono::nanoseconds SetCurrentTimerResolutionToMaximum(); + +/// Sleep for one tick of the current timer resolution. +void SleepForOneTick(); + +} // namespace Common::Windows -- cgit v1.2.3 From 7fffdf83b70ec5f0ead804cb2c16b9029f7e0cfa Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 19:43:00 -0500 Subject: wall_clock: Make use of SteadyClock --- src/common/wall_clock.cpp | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'src/common') diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index ae07f2811..6d972d136 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/steady_clock.h" #include "common/uint128.h" #include "common/wall_clock.h" @@ -11,45 +12,32 @@ namespace Common { -using base_timer = std::chrono::steady_clock; -using base_time_point = std::chrono::time_point; - class StandardWallClock final : public WallClock { public: explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) { - start_time = base_timer::now(); - } + : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, + start_time{SteadyClock::Now()} {} std::chrono::nanoseconds GetTimeNS() override { - base_time_point current = base_timer::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return SteadyClock::Now() - start_time; } std::chrono::microseconds GetTimeUS() override { - base_time_point current = base_timer::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return std::chrono::duration_cast(GetTimeNS()); } std::chrono::milliseconds GetTimeMS() override { - base_time_point current = base_timer::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return std::chrono::duration_cast(GetTimeNS()); } u64 GetClockCycles() override { - std::chrono::nanoseconds time_now = GetTimeNS(); - const u128 temporary = - Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); - return Common::Divide128On32(temporary, 1000000000).first; + const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); + return Common::Divide128On32(temp, NS_RATIO).first; } u64 GetCPUCycles() override { - std::chrono::nanoseconds time_now = GetTimeNS(); - const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); - return Common::Divide128On32(temporary, 1000000000).first; + const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); + return Common::Divide128On32(temp, NS_RATIO).first; } void Pause([[maybe_unused]] bool is_paused) override { @@ -57,7 +45,7 @@ public: } private: - base_time_point start_time; + SteadyClock::time_point start_time; }; #ifdef ARCHITECTURE_x86_64 -- cgit v1.2.3 From bff14532825e7517882ca913738347059f73cf7f Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 21:06:19 -0500 Subject: core_timing: Use higher precision sleeps on Windows The precision of sleep_for and wait_for is limited to 1-1.5ms on Windows. Using SleepForOneTick() allows us to sleep for exactly one interval of the current timer resolution. This allows us to take advantage of systems that have a timer resolution of 0.5ms to reduce CPU overhead in the event loop. --- src/common/wall_clock.cpp | 5 +++++ src/common/wall_clock.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'src/common') diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 6d972d136..817e71d52 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -81,4 +81,9 @@ std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, #endif +std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency) { + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); +} + } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 828a523a8..157ec5eae 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -55,4 +55,7 @@ private: [[nodiscard]] std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency); +[[nodiscard]] std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency); + } // namespace Common -- cgit v1.2.3 From 026eaddbeef4d0a4992fb19b40af9242f09c3c6a Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Fri, 3 Mar 2023 20:54:15 -0500 Subject: timer_resolution: Set current process to High QoS Ensures that this process is treated as a high performance process by the Windows scheduler. --- src/common/windows/timer_resolution.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/common') diff --git a/src/common/windows/timer_resolution.cpp b/src/common/windows/timer_resolution.cpp index 6c2063a4c..29c6e5c7e 100644 --- a/src/common/windows/timer_resolution.cpp +++ b/src/common/windows/timer_resolution.cpp @@ -18,6 +18,15 @@ NTSYSAPI LONG NTAPI NtSetTimerResolution(ULONG DesiredResolution, BOOLEAN SetRes NTSYSAPI LONG NTAPI NtDelayExecution(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval); } +// Defines for compatibility with older Windows 10 SDKs. + +#ifndef PROCESS_POWER_THROTTLING_EXECUTION_SPEED +#define PROCESS_POWER_THROTTLING_EXECUTION_SPEED 0x1 +#endif +#ifndef PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION +#define PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION 0x4 +#endif + namespace Common::Windows { namespace { @@ -51,6 +60,18 @@ TimerResolution GetTimerResolution() { }; } +void SetHighQoS() { + // https://learn.microsoft.com/en-us/windows/win32/procthread/quality-of-service + PROCESS_POWER_THROTTLING_STATE PowerThrottling{ + .Version{PROCESS_POWER_THROTTLING_CURRENT_VERSION}, + .ControlMask{PROCESS_POWER_THROTTLING_EXECUTION_SPEED | + PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION}, + .StateMask{}, + }; + SetProcessInformation(GetCurrentProcess(), ProcessPowerThrottling, &PowerThrottling, + sizeof(PROCESS_POWER_THROTTLING_STATE)); +} + } // Anonymous namespace nanoseconds GetMinimumTimerResolution() { @@ -74,6 +95,7 @@ nanoseconds SetCurrentTimerResolution(nanoseconds timer_resolution) { } nanoseconds SetCurrentTimerResolutionToMaximum() { + SetHighQoS(); return SetCurrentTimerResolution(GetMaximumTimerResolution()); } -- cgit v1.2.3 From 376a414f5bc6d27e5e0fbda323caf5520436bf39 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Fri, 3 Mar 2023 21:02:24 -0500 Subject: native_clock: Round RDTSC frequency to the nearest 1000 --- src/common/x64/native_clock.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src/common') diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 8b08332ab..bc1a973b0 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -6,6 +6,7 @@ #include #include "common/atomic_ops.h" +#include "common/steady_clock.h" #include "common/uint128.h" #include "common/x64/native_clock.h" @@ -39,6 +40,12 @@ static u64 FencedRDTSC() { } #endif +template +static u64 RoundToNearest(u64 value) { + const auto mod = value % Nearest; + return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +} + u64 EstimateRDTSCFrequency() { // Discard the first result measuring the rdtsc. FencedRDTSC(); @@ -46,18 +53,18 @@ u64 EstimateRDTSCFrequency() { FencedRDTSC(); // Get the current time. - const auto start_time = std::chrono::steady_clock::now(); + const auto start_time = Common::SteadyClock::Now(); const u64 tsc_start = FencedRDTSC(); - // Wait for 200 milliseconds. - std::this_thread::sleep_for(std::chrono::milliseconds{200}); - const auto end_time = std::chrono::steady_clock::now(); + // Wait for 250 milliseconds. + std::this_thread::sleep_for(std::chrono::milliseconds{250}); + const auto end_time = Common::SteadyClock::Now(); const u64 tsc_end = FencedRDTSC(); // Calculate differences. const u64 timer_diff = static_cast( std::chrono::duration_cast(end_time - start_time).count()); const u64 tsc_diff = tsc_end - tsc_start; const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - return tsc_freq; + return RoundToNearest<1000>(tsc_freq); } namespace X64 { -- cgit v1.2.3