// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include #include #include #include #include #ifdef _MSC_VER #include #pragma intrinsic(__umulh) #pragma intrinsic(_udiv128) #else #include #endif #include "common/atomic_ops.h" #include "common/uint128.h" #include "common/x64/native_clock.h" namespace { [[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { #ifdef __SIZEOF_INT128__ const auto base = static_cast(numerator) << 64ULL; return static_cast(base / divisor); #elif defined(_M_X64) || defined(_M_ARM64) std::array r = {0, numerator}; u64 remainder; #if _MSC_VER < 1923 return udiv128(r[1], r[0], divisor, &remainder); #else return _udiv128(r[1], r[0], divisor, &remainder); #endif #else // This one is bit more inaccurate. return MultiplyAndDivide64(std::numeric_limits::max(), numerator, divisor); #endif } [[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) { #ifdef __SIZEOF_INT128__ return (static_cast(a) * static_cast(b)) >> 64; #elif defined(_M_X64) || defined(_M_ARM64) return __umulh(a, b); // MSVC #else // Generic fallback const u64 a_lo = u32(a); const u64 a_hi = a >> 32; const u64 b_lo = u32(b); const u64 b_hi = b >> 32; const u64 a_x_b_hi = a_hi * b_hi; const u64 a_x_b_mid = a_hi * b_lo; const u64 b_x_a_mid = b_hi * a_lo; const u64 a_x_b_lo = a_lo * b_lo; const u64 carry_bit = (static_cast(static_cast(a_x_b_mid)) + static_cast(static_cast(b_x_a_mid)) + (a_x_b_lo >> 32)) >> 32; const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; return multhi; #endif } } // namespace namespace Common { u64 EstimateRDTSCFrequency() { const auto milli_10 = std::chrono::milliseconds{10}; // get current time _mm_mfence(); const u64 tscStart = __rdtsc(); const auto startTime = std::chrono::high_resolution_clock::now(); // wait roughly 3 seconds while (true) { auto milli = std::chrono::duration_cast( std::chrono::high_resolution_clock::now() - startTime); if (milli.count() >= 3000) break; std::this_thread::sleep_for(milli_10); } const auto endTime = std::chrono::high_resolution_clock::now(); _mm_mfence(); const u64 tscEnd = __rdtsc(); // calculate difference const u64 timer_diff = std::chrono::duration_cast(endTime - startTime).count(); const u64 tsc_diff = tscEnd - tscStart; const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); return tsc_freq; } namespace X64 { NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, u64 rtsc_frequency_) : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ rtsc_frequency_} { _mm_mfence(); time_point.inner.last_measure = __rdtsc(); time_point.inner.accumulated_ticks = 0U; ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency); us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency); ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency); clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); } u64 NativeClock::GetRTSC() { TimePoint new_time_point{}; TimePoint current_time_point{}; do { current_time_point.pack = time_point.pack; _mm_mfence(); const u64 current_measure = __rdtsc(); u64 diff = current_measure - current_time_point.inner.last_measure; diff = diff & ~static_cast(static_cast(diff) >> 63); // max(diff, 0) new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure ? current_measure : current_time_point.inner.last_measure; new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, current_time_point.pack)); /// The clock cannot be more precise than the guest timer, remove the lower bits return new_time_point.inner.accumulated_ticks & inaccuracy_mask; } void NativeClock::Pause(bool is_paused) { if (!is_paused) { TimePoint current_time_point{}; TimePoint new_time_point{}; do { current_time_point.pack = time_point.pack; new_time_point.pack = current_time_point.pack; _mm_mfence(); new_time_point.inner.last_measure = __rdtsc(); } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, current_time_point.pack)); } } std::chrono::nanoseconds NativeClock::GetTimeNS() { const u64 rtsc_value = GetRTSC(); return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; } std::chrono::microseconds NativeClock::GetTimeUS() { const u64 rtsc_value = GetRTSC(); return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; } std::chrono::milliseconds NativeClock::GetTimeMS() { const u64 rtsc_value = GetRTSC(); return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; } u64 NativeClock::GetClockCycles() { const u64 rtsc_value = GetRTSC(); return MultiplyHigh(rtsc_value, clock_rtsc_factor); } u64 NativeClock::GetCPUCycles() { const u64 rtsc_value = GetRTSC(); return MultiplyHigh(rtsc_value, cpu_rtsc_factor); } } // namespace X64 } // namespace Common