diff options
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/input.h | 2 | ||||
-rw-r--r-- | src/common/ring_buffer.h | 2 | ||||
-rw-r--r-- | src/common/scratch_buffer.h | 46 | ||||
-rw-r--r-- | src/common/settings.h | 10 | ||||
-rw-r--r-- | src/common/telemetry.cpp | 1 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.cpp | 1 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.h | 1 | ||||
-rw-r--r-- | src/common/x64/cpu_wait.cpp | 52 |
8 files changed, 74 insertions, 41 deletions
diff --git a/src/common/input.h b/src/common/input.h index ea30770ae..2c4ccea22 100644 --- a/src/common/input.h +++ b/src/common/input.h @@ -75,8 +75,10 @@ enum class DriverResult { ErrorWritingData, NoDeviceDetected, InvalidHandle, + InvalidParameters, NotSupported, Disabled, + Delayed, Unknown, }; diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h index 416680d44..5c961b202 100644 --- a/src/common/ring_buffer.h +++ b/src/common/ring_buffer.h @@ -54,7 +54,7 @@ public: return push_count; } - std::size_t Push(const std::span<T> input) { + std::size_t Push(std::span<const T> input) { return Push(input.data(), input.size()); } diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index 6fe907953..d5961b020 100644 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h @@ -5,7 +5,6 @@ #include <iterator> -#include "common/concepts.h" #include "common/make_unique_for_overwrite.h" namespace Common { @@ -19,15 +18,22 @@ namespace Common { template <typename T> class ScratchBuffer { public: - using iterator = T*; - using const_iterator = const T*; - using value_type = T; using element_type = T; - using iterator_category = std::contiguous_iterator_tag; + using value_type = T; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using iterator = pointer; + using const_iterator = const_pointer; + using iterator_category = std::random_access_iterator_tag; + using iterator_concept = std::contiguous_iterator_tag; ScratchBuffer() = default; - explicit ScratchBuffer(size_t initial_capacity) + explicit ScratchBuffer(size_type initial_capacity) : last_requested_size{initial_capacity}, buffer_capacity{initial_capacity}, buffer{Common::make_unique_for_overwrite<T[]>(initial_capacity)} {} @@ -39,7 +45,7 @@ public: /// This will only grow the buffer's capacity if size is greater than the current capacity. /// The previously held data will remain intact. - void resize(size_t size) { + void resize(size_type size) { if (size > buffer_capacity) { auto new_buffer = Common::make_unique_for_overwrite<T[]>(size); std::move(buffer.get(), buffer.get() + buffer_capacity, new_buffer.get()); @@ -51,7 +57,7 @@ public: /// This will only grow the buffer's capacity if size is greater than the current capacity. /// The previously held data will be destroyed if a reallocation occurs. - void resize_destructive(size_t size) { + void resize_destructive(size_type size) { if (size > buffer_capacity) { buffer_capacity = size; buffer = Common::make_unique_for_overwrite<T[]>(buffer_capacity); @@ -59,43 +65,43 @@ public: last_requested_size = size; } - [[nodiscard]] T* data() noexcept { + [[nodiscard]] pointer data() noexcept { return buffer.get(); } - [[nodiscard]] const T* data() const noexcept { + [[nodiscard]] const_pointer data() const noexcept { return buffer.get(); } - [[nodiscard]] T* begin() noexcept { + [[nodiscard]] iterator begin() noexcept { return data(); } - [[nodiscard]] const T* begin() const noexcept { + [[nodiscard]] const_iterator begin() const noexcept { return data(); } - [[nodiscard]] T* end() noexcept { + [[nodiscard]] iterator end() noexcept { return data() + last_requested_size; } - [[nodiscard]] const T* end() const noexcept { + [[nodiscard]] const_iterator end() const noexcept { return data() + last_requested_size; } - [[nodiscard]] T& operator[](size_t i) { + [[nodiscard]] reference operator[](size_type i) { return buffer[i]; } - [[nodiscard]] const T& operator[](size_t i) const { + [[nodiscard]] const_reference operator[](size_type i) const { return buffer[i]; } - [[nodiscard]] size_t size() const noexcept { + [[nodiscard]] size_type size() const noexcept { return last_requested_size; } - [[nodiscard]] size_t capacity() const noexcept { + [[nodiscard]] size_type capacity() const noexcept { return buffer_capacity; } @@ -106,8 +112,8 @@ public: } private: - size_t last_requested_size{}; - size_t buffer_capacity{}; + size_type last_requested_size{}; + size_type buffer_capacity{}; std::unique_ptr<T[]> buffer{}; }; diff --git a/src/common/settings.h b/src/common/settings.h index ae5ed93d8..59e96e74f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -527,12 +527,10 @@ struct Values { Setting<bool> mouse_panning{false, "mouse_panning"}; Setting<u8, true> mouse_panning_x_sensitivity{50, 1, 100, "mouse_panning_x_sensitivity"}; Setting<u8, true> mouse_panning_y_sensitivity{50, 1, 100, "mouse_panning_y_sensitivity"}; - Setting<u8, true> mouse_panning_deadzone_x_counterweight{ - 0, 0, 100, "mouse_panning_deadzone_x_counterweight"}; - Setting<u8, true> mouse_panning_deadzone_y_counterweight{ - 0, 0, 100, "mouse_panning_deadzone_y_counterweight"}; - Setting<u8, true> mouse_panning_decay_strength{22, 0, 100, "mouse_panning_decay_strength"}; - Setting<u8, true> mouse_panning_min_decay{5, 0, 100, "mouse_panning_min_decay"}; + Setting<u8, true> mouse_panning_deadzone_counterweight{20, 0, 100, + "mouse_panning_deadzone_counterweight"}; + Setting<u8, true> mouse_panning_decay_strength{18, 0, 100, "mouse_panning_decay_strength"}; + Setting<u8, true> mouse_panning_min_decay{6, 0, 100, "mouse_panning_min_decay"}; Setting<bool> mouse_enabled{false, "mouse_enabled"}; Setting<bool> emulate_analog_keyboard{false, "emulate_analog_keyboard"}; diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 91352912d..929ed67e4 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -93,6 +93,7 @@ void AppendCPUInfo(FieldCollection& fc) { add_field("CPU_Extension_x64_GFNI", caps.gfni); add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); + add_field("CPU_Extension_x64_MONITORX", caps.monitorx); add_field("CPU_Extension_x64_MOVBE", caps.movbe); add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); add_field("CPU_Extension_x64_POPCNT", caps.popcnt); diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c998b1197..780120a5b 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -168,6 +168,7 @@ static CPUCaps Detect() { __cpuid(cpu_id, 0x80000001); caps.lzcnt = Common::Bit<5>(cpu_id[2]); caps.fma4 = Common::Bit<16>(cpu_id[2]); + caps.monitorx = Common::Bit<29>(cpu_id[2]); } if (max_ex_fn >= 0x80000007) { diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 8253944d6..756459417 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -63,6 +63,7 @@ struct CPUCaps { bool gfni : 1; bool invariant_tsc : 1; bool lzcnt : 1; + bool monitorx : 1; bool movbe : 1; bool pclmulqdq : 1; bool popcnt : 1; diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index c53dd4945..41d385f59 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -13,36 +13,60 @@ namespace Common::X64 { +namespace { + +// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. +// For reference: +// At 1 GHz, 100K cycles is 100us +// At 2 GHz, 100K cycles is 50us +// At 4 GHz, 100K cycles is 25us +constexpr auto PauseCycles = 100'000U; + +} // Anonymous namespace + #ifdef _MSC_VER __forceinline static void TPAUSE() { - // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. - // For reference: - // At 1 GHz, 100K cycles is 100us - // At 2 GHz, 100K cycles is 50us - // At 4 GHz, 100K cycles is 25us - static constexpr auto PauseCycles = 100'000; - _tpause(0, FencedRDTSC() + PauseCycles); + static constexpr auto RequestC02State = 0U; + _tpause(RequestC02State, FencedRDTSC() + PauseCycles); +} + +__forceinline static void MWAITX() { + static constexpr auto EnableWaitTimeFlag = 1U << 1; + static constexpr auto RequestC1State = 0U; + + // monitor_var should be aligned to a cache line. + alignas(64) u64 monitor_var{}; + _mm_monitorx(&monitor_var, 0, 0); + _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); } #else static void TPAUSE() { - // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. - // For reference: - // At 1 GHz, 100K cycles is 100us - // At 2 GHz, 100K cycles is 50us - // At 4 GHz, 100K cycles is 25us - static constexpr auto PauseCycles = 100'000; + static constexpr auto RequestC02State = 0U; const auto tsc = FencedRDTSC() + PauseCycles; const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); const auto edx = static_cast<u32>(tsc >> 32); - asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); + asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); +} + +static void MWAITX() { + static constexpr auto EnableWaitTimeFlag = 1U << 1; + static constexpr auto RequestC1State = 0U; + + // monitor_var should be aligned to a cache line. + alignas(64) u64 monitor_var{}; + asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); + asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); } #endif void MicroSleep() { static const bool has_waitpkg = GetCPUCaps().waitpkg; + static const bool has_monitorx = GetCPUCaps().monitorx; if (has_waitpkg) { TPAUSE(); + } else if (has_monitorx) { + MWAITX(); } else { std::this_thread::yield(); } |