diff options
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/atomic_ops.h | 90 | ||||
-rw-r--r-- | src/common/logging/filter.cpp | 1 | ||||
-rw-r--r-- | src/common/logging/types.h | 1 | ||||
-rw-r--r-- | src/common/settings.h | 3 | ||||
-rw-r--r-- | src/common/x64/native_clock.cpp | 59 |
5 files changed, 135 insertions, 19 deletions
diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h index b94d73c7a..69fde8421 100644 --- a/src/common/atomic_ops.h +++ b/src/common/atomic_ops.h @@ -46,6 +46,50 @@ namespace Common { reinterpret_cast<__int64*>(expected.data())) != 0; } +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected, + u8& actual) { + actual = + _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected, + u16& actual) { + actual = + _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected, + u32& actual) { + actual = + _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected, + u64& actual) { + actual = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), value, + expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected, + u128& actual) { + const bool result = + _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1], + value[0], reinterpret_cast<__int64*>(expected.data())) != 0; + actual = expected; + return result; +} + +[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { + u128 result{}; + _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1], + result[0], reinterpret_cast<__int64*>(result.data())); + return result; +} + #else [[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) { @@ -72,6 +116,52 @@ namespace Common { return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); } +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected, + u8& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected, + u16& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected, + u32& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected, + u64& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected, + u128& actual) { + unsigned __int128 value_a; + unsigned __int128 expected_a; + unsigned __int128 actual_a; + std::memcpy(&value_a, value.data(), sizeof(u128)); + std::memcpy(&expected_a, expected.data(), sizeof(u128)); + actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); + std::memcpy(actual.data(), &actual_a, sizeof(u128)); + return actual_a == expected_a; +} + +[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { + unsigned __int128 zeros_a = 0; + unsigned __int128 result_a = + __sync_val_compare_and_swap((unsigned __int128*)pointer, zeros_a, zeros_a); + + u128 result; + std::memcpy(result.data(), &result_a, sizeof(u128)); + return result; +} + #endif } // namespace Common diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 9120cc178..4acbff649 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -101,6 +101,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Service, GRC) \ SUB(Service, HID) \ SUB(Service, IRS) \ + SUB(Service, JIT) \ SUB(Service, LBL) \ SUB(Service, LDN) \ SUB(Service, LDR) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index f803ab796..99c15fa96 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -69,6 +69,7 @@ enum class Class : u8 { Service_GRC, ///< The game recording service Service_HID, ///< The HID (Human interface device) service Service_IRS, ///< The IRS service + Service_JIT, ///< The JIT service Service_LBL, ///< The LBL (LCD backlight) service Service_LDN, ///< The LDN (Local domain network) service Service_LDR, ///< The loader service diff --git a/src/common/settings.h b/src/common/settings.h index a37d83fb3..86e0fa140 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -38,6 +38,7 @@ enum class CPUAccuracy : u32 { Auto = 0, Accurate = 1, Unsafe = 2, + Paranoid = 3, }; enum class FullscreenMode : u32 { @@ -470,7 +471,7 @@ struct Values { // Cpu RangedSetting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, - CPUAccuracy::Unsafe, "cpu_accuracy"}; + CPUAccuracy::Paranoid, "cpu_accuracy"}; // TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021 BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"}; BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"}; diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 347e41efc..7fd9d22f8 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -10,25 +10,49 @@ #include "common/uint128.h" #include "common/x64/native_clock.h" +#ifdef _MSC_VER +#include <intrin.h> +#endif + namespace Common { +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { + _mm_lfence(); + _ReadWriteBarrier(); + const u64 result = __rdtsc(); + _mm_lfence(); + _ReadWriteBarrier(); + return result; +} +#else +static u64 FencedRDTSC() { + u64 result; + asm volatile("lfence\n\t" + "rdtsc\n\t" + "shl $32, %%rdx\n\t" + "or %%rdx, %0\n\t" + "lfence" + : "=a"(result) + : + : "rdx", "memory", "cc"); + return result; +} +#endif + u64 EstimateRDTSCFrequency() { // Discard the first result measuring the rdtsc. - _mm_mfence(); - __rdtsc(); + FencedRDTSC(); std::this_thread::sleep_for(std::chrono::milliseconds{1}); - _mm_mfence(); - __rdtsc(); + FencedRDTSC(); // Get the current time. const auto start_time = std::chrono::steady_clock::now(); - _mm_mfence(); - const u64 tsc_start = __rdtsc(); + const u64 tsc_start = FencedRDTSC(); // Wait for 200 milliseconds. std::this_thread::sleep_for(std::chrono::milliseconds{200}); const auto end_time = std::chrono::steady_clock::now(); - _mm_mfence(); - const u64 tsc_end = __rdtsc(); + const u64 tsc_end = FencedRDTSC(); // Calculate differences. const u64 timer_diff = static_cast<u64>( std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); @@ -42,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen u64 rtsc_frequency_) : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ rtsc_frequency_} { - _mm_mfence(); - time_point.inner.last_measure = __rdtsc(); + time_point.inner.last_measure = FencedRDTSC(); time_point.inner.accumulated_ticks = 0U; ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); @@ -55,10 +78,10 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen u64 NativeClock::GetRTSC() { TimePoint new_time_point{}; TimePoint current_time_point{}; + + current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); do { - current_time_point.pack = time_point.pack; - _mm_mfence(); - const u64 current_measure = __rdtsc(); + const u64 current_measure = FencedRDTSC(); u64 diff = current_measure - current_time_point.inner.last_measure; diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure @@ -66,7 +89,7 @@ u64 NativeClock::GetRTSC() { : current_time_point.inner.last_measure; new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack)); + current_time_point.pack, current_time_point.pack)); /// The clock cannot be more precise than the guest timer, remove the lower bits return new_time_point.inner.accumulated_ticks & inaccuracy_mask; } @@ -75,13 +98,13 @@ void NativeClock::Pause(bool is_paused) { if (!is_paused) { TimePoint current_time_point{}; TimePoint new_time_point{}; + + current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); do { - current_time_point.pack = time_point.pack; new_time_point.pack = current_time_point.pack; - _mm_mfence(); - new_time_point.inner.last_measure = __rdtsc(); + new_time_point.inner.last_measure = FencedRDTSC(); } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack)); + current_time_point.pack, current_time_point.pack)); } } |