diff options
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/CMakeLists.txt | 4 | ||||
-rw-r--r-- | src/common/bounded_threadsafe_queue.h | 319 | ||||
-rw-r--r-- | src/common/container_hash.h | 92 | ||||
-rw-r--r-- | src/common/intrusive_red_black_tree.h | 8 | ||||
-rw-r--r-- | src/common/logging/backend.cpp | 16 | ||||
-rw-r--r-- | src/common/range_map.h | 6 | ||||
-rw-r--r-- | src/common/string_util.cpp | 14 | ||||
-rw-r--r-- | src/common/string_util.h | 8 | ||||
-rw-r--r-- | src/common/telemetry.cpp | 1 | ||||
-rw-r--r-- | src/common/typed_address.h | 315 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.cpp | 1 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.h | 1 | ||||
-rw-r--r-- | src/common/x64/cpu_wait.cpp | 69 | ||||
-rw-r--r-- | src/common/x64/cpu_wait.h | 10 | ||||
-rw-r--r-- | src/common/x64/native_clock.cpp | 13 | ||||
-rw-r--r-- | src/common/zstd_compression.cpp | 2 |
16 files changed, 726 insertions, 153 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 61ab68864..13ed68b3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -38,6 +38,7 @@ add_library(common STATIC common_precompiled_headers.h common_types.h concepts.h + container_hash.h demangle.cpp demangle.h div_ceil.h @@ -132,6 +133,7 @@ add_library(common STATIC time_zone.h tiny_mt.h tree.h + typed_address.h uint128.h unique_function.h uuid.cpp @@ -158,6 +160,8 @@ if(ARCHITECTURE_x86_64) PRIVATE x64/cpu_detect.cpp x64/cpu_detect.h + x64/cpu_wait.cpp + x64/cpu_wait.h x64/native_clock.cpp x64/native_clock.h x64/xbyak_abi.h diff --git a/src/common/bounded_threadsafe_queue.h b/src/common/bounded_threadsafe_queue.h index 21217801e..bd87aa09b 100644 --- a/src/common/bounded_threadsafe_queue.h +++ b/src/common/bounded_threadsafe_queue.h @@ -1,158 +1,249 @@ -// SPDX-FileCopyrightText: Copyright (c) 2020 Erik Rigtorp <erik@rigtorp.se> -// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include <atomic> -#include <bit> #include <condition_variable> -#include <memory> +#include <cstddef> #include <mutex> #include <new> -#include <stop_token> -#include <type_traits> -#include <utility> + +#include "common/polyfill_thread.h" namespace Common { -#if defined(__cpp_lib_hardware_interference_size) -constexpr size_t hardware_interference_size = std::hardware_destructive_interference_size; -#else -constexpr size_t hardware_interference_size = 64; -#endif +namespace detail { +constexpr size_t DefaultCapacity = 0x1000; +} // namespace detail + +template <typename T, size_t Capacity = detail::DefaultCapacity> +class SPSCQueue { + static_assert((Capacity & (Capacity - 1)) == 0, "Capacity must be a power of two."); -template <typename T, size_t capacity = 0x400> -class MPSCQueue { public: - explicit MPSCQueue() : allocator{std::allocator<Slot<T>>()} { - // Allocate one extra slot to prevent false sharing on the last slot - slots = allocator.allocate(capacity + 1); - // Allocators are not required to honor alignment for over-aligned types - // (see http://eel.is/c++draft/allocator.requirements#10) so we verify - // alignment here - if (reinterpret_cast<uintptr_t>(slots) % alignof(Slot<T>) != 0) { - allocator.deallocate(slots, capacity + 1); - throw std::bad_alloc(); - } - for (size_t i = 0; i < capacity; ++i) { - std::construct_at(&slots[i]); - } - static_assert(std::has_single_bit(capacity), "capacity must be an integer power of 2"); - static_assert(alignof(Slot<T>) == hardware_interference_size, - "Slot must be aligned to cache line boundary to prevent false sharing"); - static_assert(sizeof(Slot<T>) % hardware_interference_size == 0, - "Slot size must be a multiple of cache line size to prevent " - "false sharing between adjacent slots"); - static_assert(sizeof(MPSCQueue) % hardware_interference_size == 0, - "Queue size must be a multiple of cache line size to " - "prevent false sharing between adjacent queues"); - } - - ~MPSCQueue() noexcept { - for (size_t i = 0; i < capacity; ++i) { - std::destroy_at(&slots[i]); - } - allocator.deallocate(slots, capacity + 1); + template <typename... Args> + bool TryEmplace(Args&&... args) { + return Emplace<PushMode::Try>(std::forward<Args>(args)...); } - // The queue must be both non-copyable and non-movable - MPSCQueue(const MPSCQueue&) = delete; - MPSCQueue& operator=(const MPSCQueue&) = delete; + template <typename... Args> + void EmplaceWait(Args&&... args) { + Emplace<PushMode::Wait>(std::forward<Args>(args)...); + } - MPSCQueue(MPSCQueue&&) = delete; - MPSCQueue& operator=(MPSCQueue&&) = delete; + bool TryPop(T& t) { + return Pop<PopMode::Try>(t); + } - void Push(const T& v) noexcept { - static_assert(std::is_nothrow_copy_constructible_v<T>, - "T must be nothrow copy constructible"); - emplace(v); + void PopWait(T& t) { + Pop<PopMode::Wait>(t); } - template <typename P, typename = std::enable_if_t<std::is_nothrow_constructible_v<T, P&&>>> - void Push(P&& v) noexcept { - emplace(std::forward<P>(v)); + void PopWait(T& t, std::stop_token stop_token) { + Pop<PopMode::WaitWithStopToken>(t, stop_token); } - void Pop(T& v, std::stop_token stop) noexcept { - auto const tail = tail_.fetch_add(1); - auto& slot = slots[idx(tail)]; - if (!slot.turn.test()) { - std::unique_lock lock{cv_mutex}; - cv.wait(lock, stop, [&slot] { return slot.turn.test(); }); - } - v = slot.move(); - slot.destroy(); - slot.turn.clear(); - slot.turn.notify_one(); + T PopWait() { + T t; + Pop<PopMode::Wait>(t); + return t; + } + + T PopWait(std::stop_token stop_token) { + T t; + Pop<PopMode::WaitWithStopToken>(t, stop_token); + return t; } private: - template <typename U = T> - struct Slot { - ~Slot() noexcept { - if (turn.test()) { - destroy(); + enum class PushMode { + Try, + Wait, + Count, + }; + + enum class PopMode { + Try, + Wait, + WaitWithStopToken, + Count, + }; + + template <PushMode Mode, typename... Args> + bool Emplace(Args&&... args) { + const size_t write_index = m_write_index.load(std::memory_order::relaxed); + + if constexpr (Mode == PushMode::Try) { + // Check if we have free slots to write to. + if ((write_index - m_read_index.load(std::memory_order::acquire)) == Capacity) { + return false; } + } else if constexpr (Mode == PushMode::Wait) { + // Wait until we have free slots to write to. + std::unique_lock lock{producer_cv_mutex}; + producer_cv.wait(lock, [this, write_index] { + return (write_index - m_read_index.load(std::memory_order::acquire)) < Capacity; + }); + } else { + static_assert(Mode < PushMode::Count, "Invalid PushMode."); } - template <typename... Args> - void construct(Args&&... args) noexcept { - static_assert(std::is_nothrow_constructible_v<U, Args&&...>, - "T must be nothrow constructible with Args&&..."); - std::construct_at(reinterpret_cast<U*>(&storage), std::forward<Args>(args)...); - } + // Determine the position to write to. + const size_t pos = write_index % Capacity; - void destroy() noexcept { - static_assert(std::is_nothrow_destructible_v<U>, "T must be nothrow destructible"); - std::destroy_at(reinterpret_cast<U*>(&storage)); - } + // Emplace into the queue. + std::construct_at(std::addressof(m_data[pos]), std::forward<Args>(args)...); + + // Increment the write index. + ++m_write_index; + + // Notify the consumer that we have pushed into the queue. + std::scoped_lock lock{consumer_cv_mutex}; + consumer_cv.notify_one(); + + return true; + } + + template <PopMode Mode> + bool Pop(T& t, [[maybe_unused]] std::stop_token stop_token = {}) { + const size_t read_index = m_read_index.load(std::memory_order::relaxed); - U&& move() noexcept { - return reinterpret_cast<U&&>(storage); + if constexpr (Mode == PopMode::Try) { + // Check if the queue is empty. + if (read_index == m_write_index.load(std::memory_order::acquire)) { + return false; + } + } else if constexpr (Mode == PopMode::Wait) { + // Wait until the queue is not empty. + std::unique_lock lock{consumer_cv_mutex}; + consumer_cv.wait(lock, [this, read_index] { + return read_index != m_write_index.load(std::memory_order::acquire); + }); + } else if constexpr (Mode == PopMode::WaitWithStopToken) { + // Wait until the queue is not empty. + std::unique_lock lock{consumer_cv_mutex}; + Common::CondvarWait(consumer_cv, lock, stop_token, [this, read_index] { + return read_index != m_write_index.load(std::memory_order::acquire); + }); + if (stop_token.stop_requested()) { + return false; + } + } else { + static_assert(Mode < PopMode::Count, "Invalid PopMode."); } - // Align to avoid false sharing between adjacent slots - alignas(hardware_interference_size) std::atomic_flag turn{}; - struct aligned_store { - struct type { - alignas(U) unsigned char data[sizeof(U)]; - }; - }; - typename aligned_store::type storage; - }; + // Determine the position to read from. + const size_t pos = read_index % Capacity; + + // Pop the data off the queue, moving it. + t = std::move(m_data[pos]); + + // Increment the read index. + ++m_read_index; + + // Notify the producer that we have popped off the queue. + std::scoped_lock lock{producer_cv_mutex}; + producer_cv.notify_one(); + + return true; + } + alignas(128) std::atomic_size_t m_read_index{0}; + alignas(128) std::atomic_size_t m_write_index{0}; + + std::array<T, Capacity> m_data; + + std::condition_variable_any producer_cv; + std::mutex producer_cv_mutex; + std::condition_variable_any consumer_cv; + std::mutex consumer_cv_mutex; +}; + +template <typename T, size_t Capacity = detail::DefaultCapacity> +class MPSCQueue { +public: template <typename... Args> - void emplace(Args&&... args) noexcept { - static_assert(std::is_nothrow_constructible_v<T, Args&&...>, - "T must be nothrow constructible with Args&&..."); - auto const head = head_.fetch_add(1); - auto& slot = slots[idx(head)]; - slot.turn.wait(true); - slot.construct(std::forward<Args>(args)...); - slot.turn.test_and_set(); - cv.notify_one(); + bool TryEmplace(Args&&... args) { + std::scoped_lock lock{write_mutex}; + return spsc_queue.TryEmplace(std::forward<Args>(args)...); } - constexpr size_t idx(size_t i) const noexcept { - return i & mask; + template <typename... Args> + void EmplaceWait(Args&&... args) { + std::scoped_lock lock{write_mutex}; + spsc_queue.EmplaceWait(std::forward<Args>(args)...); } - static constexpr size_t mask = capacity - 1; + bool TryPop(T& t) { + return spsc_queue.TryPop(t); + } - // Align to avoid false sharing between head_ and tail_ - alignas(hardware_interference_size) std::atomic<size_t> head_{0}; - alignas(hardware_interference_size) std::atomic<size_t> tail_{0}; + void PopWait(T& t) { + spsc_queue.PopWait(t); + } - std::mutex cv_mutex; - std::condition_variable_any cv; + void PopWait(T& t, std::stop_token stop_token) { + spsc_queue.PopWait(t, stop_token); + } + + T PopWait() { + return spsc_queue.PopWait(); + } - Slot<T>* slots; - [[no_unique_address]] std::allocator<Slot<T>> allocator; + T PopWait(std::stop_token stop_token) { + return spsc_queue.PopWait(stop_token); + } - static_assert(std::is_nothrow_copy_assignable_v<T> || std::is_nothrow_move_assignable_v<T>, - "T must be nothrow copy or move assignable"); +private: + SPSCQueue<T, Capacity> spsc_queue; + std::mutex write_mutex; +}; - static_assert(std::is_nothrow_destructible_v<T>, "T must be nothrow destructible"); +template <typename T, size_t Capacity = detail::DefaultCapacity> +class MPMCQueue { +public: + template <typename... Args> + bool TryEmplace(Args&&... args) { + std::scoped_lock lock{write_mutex}; + return spsc_queue.TryEmplace(std::forward<Args>(args)...); + } + + template <typename... Args> + void EmplaceWait(Args&&... args) { + std::scoped_lock lock{write_mutex}; + spsc_queue.EmplaceWait(std::forward<Args>(args)...); + } + + bool TryPop(T& t) { + std::scoped_lock lock{read_mutex}; + return spsc_queue.TryPop(t); + } + + void PopWait(T& t) { + std::scoped_lock lock{read_mutex}; + spsc_queue.PopWait(t); + } + + void PopWait(T& t, std::stop_token stop_token) { + std::scoped_lock lock{read_mutex}; + spsc_queue.PopWait(t, stop_token); + } + + T PopWait() { + std::scoped_lock lock{read_mutex}; + return spsc_queue.PopWait(); + } + + T PopWait(std::stop_token stop_token) { + std::scoped_lock lock{read_mutex}; + return spsc_queue.PopWait(stop_token); + } + +private: + SPSCQueue<T, Capacity> spsc_queue; + std::mutex write_mutex; + std::mutex read_mutex; }; } // namespace Common diff --git a/src/common/container_hash.h b/src/common/container_hash.h new file mode 100644 index 000000000..a5e357745 --- /dev/null +++ b/src/common/container_hash.h @@ -0,0 +1,92 @@ +// SPDX-FileCopyrightText: 2005-2014 Daniel James +// SPDX-FileCopyrightText: 2016 Austin Appleby +// SPDX-License-Identifier: BSL-1.0 + +#include <array> +#include <climits> +#include <cstdint> +#include <limits> +#include <type_traits> +#include <vector> + +namespace Common { + +namespace detail { + +template <typename T> + requires std::is_unsigned_v<T> +inline std::size_t HashValue(T val) { + const unsigned int size_t_bits = std::numeric_limits<std::size_t>::digits; + const unsigned int length = + (std::numeric_limits<T>::digits - 1) / static_cast<unsigned int>(size_t_bits); + + std::size_t seed = 0; + + for (unsigned int i = length * size_t_bits; i > 0; i -= size_t_bits) { + seed ^= static_cast<size_t>(val >> i) + (seed << 6) + (seed >> 2); + } + + seed ^= static_cast<size_t>(val) + (seed << 6) + (seed >> 2); + + return seed; +} + +template <size_t Bits> +struct HashCombineImpl { + template <typename T> + static inline T fn(T seed, T value) { + seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } +}; + +template <> +struct HashCombineImpl<64> { + static inline std::uint64_t fn(std::uint64_t h, std::uint64_t k) { + const std::uint64_t m = (std::uint64_t(0xc6a4a793) << 32) + 0x5bd1e995; + const int r = 47; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + + // Completely arbitrary number, to prevent 0's + // from hashing to 0. + h += 0xe6546b64; + + return h; + } +}; + +} // namespace detail + +template <typename T> +inline void HashCombine(std::size_t& seed, const T& v) { + seed = detail::HashCombineImpl<sizeof(std::size_t) * CHAR_BIT>::fn(seed, detail::HashValue(v)); +} + +template <typename It> +inline std::size_t HashRange(It first, It last) { + std::size_t seed = 0; + + for (; first != last; ++first) { + HashCombine<typename std::iterator_traits<It>::value_type>(seed, *first); + } + + return seed; +} + +template <typename T, size_t Size> +std::size_t HashValue(const std::array<T, Size>& v) { + return HashRange(v.cbegin(), v.cend()); +} + +template <typename T, typename Allocator> +std::size_t HashValue(const std::vector<T, Allocator>& v) { + return HashRange(v.cbegin(), v.cend()); +} + +} // namespace Common diff --git a/src/common/intrusive_red_black_tree.h b/src/common/intrusive_red_black_tree.h index 5f6b34e82..bc2940fa0 100644 --- a/src/common/intrusive_red_black_tree.h +++ b/src/common/intrusive_red_black_tree.h @@ -96,10 +96,6 @@ public: return m_node == rhs.m_node; } - constexpr bool operator!=(const Iterator& rhs) const { - return !(*this == rhs); - } - constexpr pointer operator->() const { return m_node; } @@ -324,10 +320,6 @@ public: return m_impl == rhs.m_impl; } - constexpr bool operator!=(const Iterator& rhs) const { - return !(*this == rhs); - } - constexpr pointer operator->() const { return Traits::GetParent(std::addressof(*m_impl)); } diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 2a3bded40..f96c7c222 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -28,7 +28,7 @@ #ifdef _WIN32 #include "common/string_util.h" #endif -#include "common/threadsafe_queue.h" +#include "common/bounded_threadsafe_queue.h" namespace Common::Log { @@ -204,11 +204,11 @@ public: void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, std::string&& message) { - if (!filter.CheckMessage(log_class, log_level)) + if (!filter.CheckMessage(log_class, log_level)) { return; - const Entry& entry = - CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)); - message_queue.Push(entry); + } + message_queue.EmplaceWait( + CreateEntry(log_class, log_level, filename, line_num, function, std::move(message))); } private: @@ -225,7 +225,7 @@ private: ForEachBackend([&entry](Backend& backend) { backend.Write(entry); }); }; while (!stop_token.stop_requested()) { - entry = message_queue.PopWait(stop_token); + message_queue.PopWait(entry, stop_token); if (entry.filename != nullptr) { write_logs(); } @@ -233,7 +233,7 @@ private: // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a // case where a system is repeatedly spamming logs even on close. int max_logs_to_write = filter.IsDebug() ? INT_MAX : 100; - while (max_logs_to_write-- && message_queue.Pop(entry)) { + while (max_logs_to_write-- && message_queue.TryPop(entry)) { write_logs(); } }); @@ -273,7 +273,7 @@ private: ColorConsoleBackend color_console_backend{}; FileBackend file_backend; - MPSCQueue<Entry, true> message_queue{}; + MPSCQueue<Entry> message_queue{}; std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; std::jthread backend_thread; }; diff --git a/src/common/range_map.h b/src/common/range_map.h index 79c7ef547..ab73993e3 100644 --- a/src/common/range_map.h +++ b/src/common/range_map.h @@ -38,12 +38,12 @@ public: Map(address, address_end, null_value); } - [[nodiscard]] size_t GetContinousSizeFrom(KeyTBase address) const { + [[nodiscard]] size_t GetContinuousSizeFrom(KeyTBase address) const { const KeyT new_address = static_cast<KeyT>(address); if (new_address < 0) { return 0; } - return ContinousSizeInternal(new_address); + return ContinuousSizeInternal(new_address); } [[nodiscard]] ValueT GetValueAt(KeyT address) const { @@ -59,7 +59,7 @@ private: using IteratorType = typename MapType::iterator; using ConstIteratorType = typename MapType::const_iterator; - size_t ContinousSizeInternal(KeyT address) const { + size_t ContinuousSizeInternal(KeyT address) const { const auto it = GetFirstElementBeforeOrOn(address); if (it == container.end() || it->second == null_value) { return 0; diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index e0b6180c5..feab1653d 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -125,18 +125,18 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st return result; } -std::string UTF16ToUTF8(const std::u16string& input) { +std::string UTF16ToUTF8(std::u16string_view input) { std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; - return convert.to_bytes(input); + return convert.to_bytes(input.data(), input.data() + input.size()); } -std::u16string UTF8ToUTF16(const std::string& input) { +std::u16string UTF8ToUTF16(std::string_view input) { std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; - return convert.from_bytes(input); + return convert.from_bytes(input.data(), input.data() + input.size()); } #ifdef _WIN32 -static std::wstring CPToUTF16(u32 code_page, const std::string& input) { +static std::wstring CPToUTF16(u32 code_page, std::string_view input) { const auto size = MultiByteToWideChar(code_page, 0, input.data(), static_cast<int>(input.size()), nullptr, 0); @@ -154,7 +154,7 @@ static std::wstring CPToUTF16(u32 code_page, const std::string& input) { return output; } -std::string UTF16ToUTF8(const std::wstring& input) { +std::string UTF16ToUTF8(std::wstring_view input) { const auto size = WideCharToMultiByte(CP_UTF8, 0, input.data(), static_cast<int>(input.size()), nullptr, 0, nullptr, nullptr); if (size == 0) { @@ -172,7 +172,7 @@ std::string UTF16ToUTF8(const std::wstring& input) { return output; } -std::wstring UTF8ToUTF16W(const std::string& input) { +std::wstring UTF8ToUTF16W(std::string_view input) { return CPToUTF16(CP_UTF8, input); } diff --git a/src/common/string_util.h b/src/common/string_util.h index f8aecc875..c351f1a0c 100644 --- a/src/common/string_util.h +++ b/src/common/string_util.h @@ -36,12 +36,12 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _ [[nodiscard]] std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest); -[[nodiscard]] std::string UTF16ToUTF8(const std::u16string& input); -[[nodiscard]] std::u16string UTF8ToUTF16(const std::string& input); +[[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input); +[[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input); #ifdef _WIN32 -[[nodiscard]] std::string UTF16ToUTF8(const std::wstring& input); -[[nodiscard]] std::wstring UTF8ToUTF16W(const std::string& str); +[[nodiscard]] std::string UTF16ToUTF8(std::wstring_view input); +[[nodiscard]] std::wstring UTF8ToUTF16W(std::string_view str); #endif diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index d26394359..91352912d 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -97,6 +97,7 @@ void AppendCPUInfo(FieldCollection& fc) { add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); add_field("CPU_Extension_x64_POPCNT", caps.popcnt); add_field("CPU_Extension_x64_SHA", caps.sha); + add_field("CPU_Extension_x64_WAITPKG", caps.waitpkg); #else fc.AddField(FieldType::UserSystem, "CPU_Model", "Other"); #endif diff --git a/src/common/typed_address.h b/src/common/typed_address.h new file mode 100644 index 000000000..64f4a07c2 --- /dev/null +++ b/src/common/typed_address.h @@ -0,0 +1,315 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <compare> +#include <type_traits> +#include <fmt/format.h> + +#include "common/common_types.h" + +namespace Common { + +template <bool Virtual, typename T> +class TypedAddress { +public: + // Constructors. + constexpr inline TypedAddress() : m_address(0) {} + constexpr inline TypedAddress(uint64_t a) : m_address(a) {} + + template <typename U> + constexpr inline explicit TypedAddress(const U* ptr) + : m_address(reinterpret_cast<uint64_t>(ptr)) {} + + // Copy constructor. + constexpr inline TypedAddress(const TypedAddress& rhs) = default; + + // Assignment operator. + constexpr inline TypedAddress& operator=(const TypedAddress& rhs) = default; + + // Arithmetic operators. + template <typename I> + constexpr inline TypedAddress operator+(I rhs) const { + static_assert(std::is_integral_v<I>); + return m_address + rhs; + } + + constexpr inline TypedAddress operator+(TypedAddress rhs) const { + return m_address + rhs.m_address; + } + + constexpr inline TypedAddress operator++() { + return ++m_address; + } + + constexpr inline TypedAddress operator++(int) { + return m_address++; + } + + template <typename I> + constexpr inline TypedAddress operator-(I rhs) const { + static_assert(std::is_integral_v<I>); + return m_address - rhs; + } + + constexpr inline ptrdiff_t operator-(TypedAddress rhs) const { + return m_address - rhs.m_address; + } + + constexpr inline TypedAddress operator--() { + return --m_address; + } + + constexpr inline TypedAddress operator--(int) { + return m_address--; + } + + template <typename I> + constexpr inline TypedAddress operator+=(I rhs) { + static_assert(std::is_integral_v<I>); + m_address += rhs; + return *this; + } + + template <typename I> + constexpr inline TypedAddress operator-=(I rhs) { + static_assert(std::is_integral_v<I>); + m_address -= rhs; + return *this; + } + + // Logical operators. + constexpr inline uint64_t operator&(uint64_t mask) const { + return m_address & mask; + } + + constexpr inline uint64_t operator|(uint64_t mask) const { + return m_address | mask; + } + + template <typename I> + constexpr inline TypedAddress operator|=(I rhs) { + static_assert(std::is_integral_v<I>); + m_address |= rhs; + return *this; + } + + constexpr inline uint64_t operator<<(int shift) const { + return m_address << shift; + } + + constexpr inline uint64_t operator>>(int shift) const { + return m_address >> shift; + } + + template <typename U> + constexpr inline size_t operator/(U size) const { + return m_address / size; + } + + constexpr explicit operator bool() const { + return m_address != 0; + } + + // constexpr inline uint64_t operator%(U align) const { return m_address % align; } + + // Comparison operators. + constexpr bool operator==(const TypedAddress&) const = default; + constexpr auto operator<=>(const TypedAddress&) const = default; + + // For convenience, also define comparison operators versus uint64_t. + constexpr inline bool operator==(uint64_t rhs) const { + return m_address == rhs; + } + + // Allow getting the address explicitly, for use in accessors. + constexpr inline uint64_t GetValue() const { + return m_address; + } + +private: + uint64_t m_address{}; +}; + +struct PhysicalAddressTag {}; +struct VirtualAddressTag {}; +struct ProcessAddressTag {}; + +using PhysicalAddress = TypedAddress<false, PhysicalAddressTag>; +using VirtualAddress = TypedAddress<true, VirtualAddressTag>; +using ProcessAddress = TypedAddress<true, ProcessAddressTag>; + +// Define accessors. +template <typename T> +concept IsTypedAddress = std::same_as<T, PhysicalAddress> || std::same_as<T, VirtualAddress> || + std::same_as<T, ProcessAddress>; + +template <typename T> +constexpr inline T Null = [] { + if constexpr (std::is_same<T, uint64_t>::value) { + return 0; + } else { + static_assert(std::is_same<T, PhysicalAddress>::value || + std::is_same<T, VirtualAddress>::value || + std::is_same<T, ProcessAddress>::value); + return T(0); + } +}(); + +// Basic type validations. +static_assert(sizeof(PhysicalAddress) == sizeof(uint64_t)); +static_assert(sizeof(VirtualAddress) == sizeof(uint64_t)); +static_assert(sizeof(ProcessAddress) == sizeof(uint64_t)); + +static_assert(std::is_trivially_copyable_v<PhysicalAddress>); +static_assert(std::is_trivially_copyable_v<VirtualAddress>); +static_assert(std::is_trivially_copyable_v<ProcessAddress>); + +static_assert(std::is_trivially_copy_constructible_v<PhysicalAddress>); +static_assert(std::is_trivially_copy_constructible_v<VirtualAddress>); +static_assert(std::is_trivially_copy_constructible_v<ProcessAddress>); + +static_assert(std::is_trivially_move_constructible_v<PhysicalAddress>); +static_assert(std::is_trivially_move_constructible_v<VirtualAddress>); +static_assert(std::is_trivially_move_constructible_v<ProcessAddress>); + +static_assert(std::is_trivially_copy_assignable_v<PhysicalAddress>); +static_assert(std::is_trivially_copy_assignable_v<VirtualAddress>); +static_assert(std::is_trivially_copy_assignable_v<ProcessAddress>); + +static_assert(std::is_trivially_move_assignable_v<PhysicalAddress>); +static_assert(std::is_trivially_move_assignable_v<VirtualAddress>); +static_assert(std::is_trivially_move_assignable_v<ProcessAddress>); + +static_assert(std::is_trivially_destructible_v<PhysicalAddress>); +static_assert(std::is_trivially_destructible_v<VirtualAddress>); +static_assert(std::is_trivially_destructible_v<ProcessAddress>); + +static_assert(Null<uint64_t> == 0); +static_assert(Null<PhysicalAddress> == Null<uint64_t>); +static_assert(Null<VirtualAddress> == Null<uint64_t>); +static_assert(Null<ProcessAddress> == Null<uint64_t>); + +// Constructor/assignment validations. +static_assert([] { + const PhysicalAddress a(5); + PhysicalAddress b(a); + return b; +}() == PhysicalAddress(5)); +static_assert([] { + const PhysicalAddress a(5); + PhysicalAddress b(10); + b = a; + return b; +}() == PhysicalAddress(5)); + +// Arithmetic validations. +static_assert(PhysicalAddress(10) + 5 == PhysicalAddress(15)); +static_assert(PhysicalAddress(10) - 5 == PhysicalAddress(5)); +static_assert([] { + PhysicalAddress v(10); + v += 5; + return v; +}() == PhysicalAddress(15)); +static_assert([] { + PhysicalAddress v(10); + v -= 5; + return v; +}() == PhysicalAddress(5)); +static_assert(PhysicalAddress(10)++ == PhysicalAddress(10)); +static_assert(++PhysicalAddress(10) == PhysicalAddress(11)); +static_assert(PhysicalAddress(10)-- == PhysicalAddress(10)); +static_assert(--PhysicalAddress(10) == PhysicalAddress(9)); + +// Logical validations. +static_assert((PhysicalAddress(0b11111111) >> 1) == 0b01111111); +static_assert((PhysicalAddress(0b10101010) >> 1) == 0b01010101); +static_assert((PhysicalAddress(0b11111111) << 1) == 0b111111110); +static_assert((PhysicalAddress(0b01010101) << 1) == 0b10101010); +static_assert((PhysicalAddress(0b11111111) & 0b01010101) == 0b01010101); +static_assert((PhysicalAddress(0b11111111) & 0b10101010) == 0b10101010); +static_assert((PhysicalAddress(0b01010101) & 0b10101010) == 0b00000000); +static_assert((PhysicalAddress(0b00000000) | 0b01010101) == 0b01010101); +static_assert((PhysicalAddress(0b11111111) | 0b01010101) == 0b11111111); +static_assert((PhysicalAddress(0b10101010) | 0b01010101) == 0b11111111); + +// Comparisons. +static_assert(PhysicalAddress(0) == PhysicalAddress(0)); +static_assert(PhysicalAddress(0) != PhysicalAddress(1)); +static_assert(PhysicalAddress(0) < PhysicalAddress(1)); +static_assert(PhysicalAddress(0) <= PhysicalAddress(1)); +static_assert(PhysicalAddress(1) > PhysicalAddress(0)); +static_assert(PhysicalAddress(1) >= PhysicalAddress(0)); + +static_assert(!(PhysicalAddress(0) == PhysicalAddress(1))); +static_assert(!(PhysicalAddress(0) != PhysicalAddress(0))); +static_assert(!(PhysicalAddress(1) < PhysicalAddress(0))); +static_assert(!(PhysicalAddress(1) <= PhysicalAddress(0))); +static_assert(!(PhysicalAddress(0) > PhysicalAddress(1))); +static_assert(!(PhysicalAddress(0) >= PhysicalAddress(1))); + +} // namespace Common + +template <bool Virtual, typename T> +constexpr inline uint64_t GetInteger(Common::TypedAddress<Virtual, T> address) { + return address.GetValue(); +} + +template <> +struct fmt::formatter<Common::PhysicalAddress> { + constexpr auto parse(fmt::format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Common::PhysicalAddress& addr, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{:#x}", static_cast<u64>(addr.GetValue())); + } +}; + +template <> +struct fmt::formatter<Common::ProcessAddress> { + constexpr auto parse(fmt::format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Common::ProcessAddress& addr, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{:#x}", static_cast<u64>(addr.GetValue())); + } +}; + +template <> +struct fmt::formatter<Common::VirtualAddress> { + constexpr auto parse(fmt::format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Common::VirtualAddress& addr, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{:#x}", static_cast<u64>(addr.GetValue())); + } +}; + +namespace std { + +template <> +struct hash<Common::PhysicalAddress> { + size_t operator()(const Common::PhysicalAddress& k) const noexcept { + return k.GetValue(); + } +}; + +template <> +struct hash<Common::ProcessAddress> { + size_t operator()(const Common::ProcessAddress& k) const noexcept { + return k.GetValue(); + } +}; + +template <> +struct hash<Common::VirtualAddress> { + size_t operator()(const Common::VirtualAddress& k) const noexcept { + return k.GetValue(); + } +}; + +} // namespace std diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index e54383a4a..72ed6e96c 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -144,6 +144,7 @@ static CPUCaps Detect() { caps.bmi2 = Common::Bit<8>(cpu_id[1]); caps.sha = Common::Bit<29>(cpu_id[1]); + caps.waitpkg = Common::Bit<5>(cpu_id[2]); caps.gfni = Common::Bit<8>(cpu_id[2]); __cpuidex(cpu_id, 0x00000007, 0x00000001); diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index ca8db19d6..8253944d6 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -67,6 +67,7 @@ struct CPUCaps { bool pclmulqdq : 1; bool popcnt : 1; bool sha : 1; + bool waitpkg : 1; }; /** diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp new file mode 100644 index 000000000..cfeef6a3d --- /dev/null +++ b/src/common/x64/cpu_wait.cpp @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <thread> + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#include "common/x64/cpu_detect.h" +#include "common/x64/cpu_wait.h" + +namespace Common::X64 { + +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { + _mm_lfence(); + _ReadWriteBarrier(); + const u64 result = __rdtsc(); + _mm_lfence(); + _ReadWriteBarrier(); + return result; +} + +__forceinline static void TPAUSE() { + // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. + // For reference: + // At 1 GHz, 100K cycles is 100us + // At 2 GHz, 100K cycles is 50us + // At 4 GHz, 100K cycles is 25us + static constexpr auto PauseCycles = 100'000; + _tpause(0, FencedRDTSC() + PauseCycles); +} +#else +static u64 FencedRDTSC() { + u64 eax; + u64 edx; + asm volatile("lfence\n\t" + "rdtsc\n\t" + "lfence\n\t" + : "=a"(eax), "=d"(edx)); + return (edx << 32) | eax; +} + +static void TPAUSE() { + // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. + // For reference: + // At 1 GHz, 100K cycles is 100us + // At 2 GHz, 100K cycles is 50us + // At 4 GHz, 100K cycles is 25us + static constexpr auto PauseCycles = 100'000; + const auto tsc = FencedRDTSC() + PauseCycles; + const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); + const auto edx = static_cast<u32>(tsc >> 32); + asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); +} +#endif + +void MicroSleep() { + static const bool has_waitpkg = GetCPUCaps().waitpkg; + + if (has_waitpkg) { + TPAUSE(); + } else { + std::this_thread::yield(); + } +} + +} // namespace Common::X64 diff --git a/src/common/x64/cpu_wait.h b/src/common/x64/cpu_wait.h new file mode 100644 index 000000000..99d3757a7 --- /dev/null +++ b/src/common/x64/cpu_wait.h @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +namespace Common::X64 { + +void MicroSleep(); + +} // namespace Common::X64 diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 76c66e7ee..277b00662 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -27,16 +27,13 @@ __forceinline static u64 FencedRDTSC() { } #else static u64 FencedRDTSC() { - u64 result; + u64 eax; + u64 edx; asm volatile("lfence\n\t" "rdtsc\n\t" - "shl $32, %%rdx\n\t" - "or %%rdx, %0\n\t" - "lfence" - : "=a"(result) - : - : "rdx", "memory", "cc"); - return result; + "lfence\n\t" + : "=a"(eax), "=d"(edx)); + return (edx << 32) | eax; } #endif diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp index b71a41b78..cb6ec171b 100644 --- a/src/common/zstd_compression.cpp +++ b/src/common/zstd_compression.cpp @@ -33,7 +33,7 @@ std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_siz std::vector<u8> DecompressDataZSTD(std::span<const u8> compressed) { const std::size_t decompressed_size = - ZSTD_getDecompressedSize(compressed.data(), compressed.size()); + ZSTD_getFrameContentSize(compressed.data(), compressed.size()); std::vector<u8> decompressed(decompressed_size); const std::size_t uncompressed_result_size = ZSTD_decompress( |