summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/bit_util.h7
-rw-r--r--src/common/host_memory.cpp4
-rw-r--r--src/common/intrusive_red_black_tree.h391
-rw-r--r--src/common/logging/backend.cpp18
-rw-r--r--src/common/settings.cpp1
-rw-r--r--src/common/settings.h3
-rw-r--r--src/common/telemetry.cpp60
-rw-r--r--src/common/telemetry.h7
-rw-r--r--src/common/tree.h625
-rw-r--r--src/common/x64/cpu_detect.cpp124
-rw-r--r--src/common/x64/cpu_detect.h79
-rw-r--r--src/core/CMakeLists.txt5
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp16
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp17
-rw-r--r--src/core/arm/dynarmic/arm_exclusive_monitor.cpp4
-rw-r--r--src/core/arm/dynarmic/arm_exclusive_monitor.h2
-rw-r--r--src/core/arm/exclusive_monitor.h2
-rw-r--r--src/core/core.cpp4
-rw-r--r--src/core/frontend/emu_window.h11
-rw-r--r--src/core/hle/ipc_helpers.h2
-rw-r--r--src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h13
-rw-r--r--src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp12
-rw-r--r--src/core/hle/kernel/board/nintendo/nx/k_system_control.h1
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp5
-rw-r--r--src/core/hle/kernel/hle_ipc.h9
-rw-r--r--src/core/hle/kernel/init/init_slab_setup.cpp101
-rw-r--r--src/core/hle/kernel/init/init_slab_setup.h5
-rw-r--r--src/core/hle/kernel/initial_process.h23
-rw-r--r--src/core/hle/kernel/k_address_arbiter.cpp10
-rw-r--r--src/core/hle/kernel/k_condition_variable.cpp2
-rw-r--r--src/core/hle/kernel/k_memory_layout.h10
-rw-r--r--src/core/hle/kernel/k_memory_manager.cpp469
-rw-r--r--src/core/hle/kernel/k_memory_manager.h167
-rw-r--r--src/core/hle/kernel/k_memory_region_type.h10
-rw-r--r--src/core/hle/kernel/k_page_buffer.h34
-rw-r--r--src/core/hle/kernel/k_page_heap.cpp126
-rw-r--r--src/core/hle/kernel/k_page_heap.h221
-rw-r--r--src/core/hle/kernel/k_page_table.cpp335
-rw-r--r--src/core/hle/kernel/k_page_table.h38
-rw-r--r--src/core/hle/kernel/k_port.cpp7
-rw-r--r--src/core/hle/kernel/k_process.cpp194
-rw-r--r--src/core/hle/kernel/k_process.h18
-rw-r--r--src/core/hle/kernel/k_server_port.h8
-rw-r--r--src/core/hle/kernel/k_server_session.cpp15
-rw-r--r--src/core/hle/kernel/k_slab_heap.h230
-rw-r--r--src/core/hle/kernel/k_thread.cpp7
-rw-r--r--src/core/hle/kernel/k_thread.h6
-rw-r--r--src/core/hle/kernel/k_thread_local_page.cpp65
-rw-r--r--src/core/hle/kernel/k_thread_local_page.h112
-rw-r--r--src/core/hle/kernel/kernel.cpp191
-rw-r--r--src/core/hle/kernel/kernel.h26
-rw-r--r--src/core/hle/kernel/service_thread.cpp5
-rw-r--r--src/core/hle/kernel/slab_helpers.h2
-rw-r--r--src/core/hle/kernel/svc_types.h2
-rw-r--r--src/core/hle/service/am/am.cpp4
-rw-r--r--src/core/hle/service/kernel_helpers.cpp11
-rw-r--r--src/core/hle/service/ldr/ldr.cpp78
-rw-r--r--src/core/hle/service/sm/sm.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp580
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp4
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp3
-rw-r--r--src/shader_recompiler/ir_opt/rescaling_pass.cpp29
-rw-r--r--src/video_core/engines/maxwell_3d.cpp109
-rw-r--r--src/video_core/engines/maxwell_3d.h50
-rw-r--r--src/video_core/engines/maxwell_dma.cpp20
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp3
-rw-r--r--src/video_core/video_core.cpp1
-rw-r--r--src/yuzu/configuration/config.cpp6
-rw-r--r--src/yuzu/configuration/configure_cpu.cpp9
-rw-r--r--src/yuzu/configuration/configure_cpu.h1
-rw-r--r--src/yuzu/configuration/configure_cpu.ui12
-rw-r--r--src/yuzu/configuration/configure_cpu_debug.cpp8
-rw-r--r--src/yuzu/configuration/configure_cpu_debug.ui29
-rw-r--r--src/yuzu/main.cpp4
-rw-r--r--src/yuzu_cmd/config.cpp3
-rw-r--r--src/yuzu_cmd/default_ini.h19
83 files changed, 3415 insertions, 1586 deletions
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index f50d3308a..f37538e06 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -57,4 +57,11 @@ requires std::is_integral_v<T>
return static_cast<T>(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U)));
}
+template <size_t bit_index, typename T>
+requires std::is_integral_v<T>
+[[nodiscard]] constexpr bool Bit(const T value) {
+ static_assert(bit_index < BitSize<T>(), "bit_index must be smaller than size of T");
+ return ((value >> bit_index) & T(1)) == T(1);
+}
+
} // namespace Common
diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index 28949fe5e..c465cfc14 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -327,8 +327,8 @@ private:
bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const {
const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length});
if (it != placeholders.end() && it->lower() == virtual_offset + length) {
- const bool is_root = it == placeholders.begin() && virtual_offset == 0;
- return is_root || std::prev(it)->upper() == virtual_offset;
+ return it == placeholders.begin() ? virtual_offset == 0
+ : std::prev(it)->upper() == virtual_offset;
}
return false;
}
diff --git a/src/common/intrusive_red_black_tree.h b/src/common/intrusive_red_black_tree.h
index 3173cc449..b296b639e 100644
--- a/src/common/intrusive_red_black_tree.h
+++ b/src/common/intrusive_red_black_tree.h
@@ -4,6 +4,8 @@
#pragma once
+#include "common/alignment.h"
+#include "common/common_funcs.h"
#include "common/parent_of_member.h"
#include "common/tree.h"
@@ -15,32 +17,33 @@ class IntrusiveRedBlackTreeImpl;
}
+#pragma pack(push, 4)
struct IntrusiveRedBlackTreeNode {
+ YUZU_NON_COPYABLE(IntrusiveRedBlackTreeNode);
+
public:
- using EntryType = RBEntry<IntrusiveRedBlackTreeNode>;
+ using RBEntry = freebsd::RBEntry<IntrusiveRedBlackTreeNode>;
- constexpr IntrusiveRedBlackTreeNode() = default;
+private:
+ RBEntry m_entry;
- void SetEntry(const EntryType& new_entry) {
- entry = new_entry;
- }
+public:
+ explicit IntrusiveRedBlackTreeNode() = default;
- [[nodiscard]] EntryType& GetEntry() {
- return entry;
+ [[nodiscard]] constexpr RBEntry& GetRBEntry() {
+ return m_entry;
}
-
- [[nodiscard]] const EntryType& GetEntry() const {
- return entry;
+ [[nodiscard]] constexpr const RBEntry& GetRBEntry() const {
+ return m_entry;
}
-private:
- EntryType entry{};
-
- friend class impl::IntrusiveRedBlackTreeImpl;
-
- template <class, class, class>
- friend class IntrusiveRedBlackTree;
+ constexpr void SetRBEntry(const RBEntry& entry) {
+ m_entry = entry;
+ }
};
+static_assert(sizeof(IntrusiveRedBlackTreeNode) ==
+ 3 * sizeof(void*) + std::max<size_t>(sizeof(freebsd::RBColor), 4));
+#pragma pack(pop)
template <class T, class Traits, class Comparator>
class IntrusiveRedBlackTree;
@@ -48,12 +51,17 @@ class IntrusiveRedBlackTree;
namespace impl {
class IntrusiveRedBlackTreeImpl {
+ YUZU_NON_COPYABLE(IntrusiveRedBlackTreeImpl);
+
private:
template <class, class, class>
friend class ::Common::IntrusiveRedBlackTree;
- using RootType = RBHead<IntrusiveRedBlackTreeNode>;
- RootType root;
+private:
+ using RootType = freebsd::RBHead<IntrusiveRedBlackTreeNode>;
+
+private:
+ RootType m_root;
public:
template <bool Const>
@@ -81,149 +89,150 @@ public:
IntrusiveRedBlackTreeImpl::reference>;
private:
- pointer node;
+ pointer m_node;
public:
- explicit Iterator(pointer n) : node(n) {}
+ constexpr explicit Iterator(pointer n) : m_node(n) {}
- bool operator==(const Iterator& rhs) const {
- return this->node == rhs.node;
+ constexpr bool operator==(const Iterator& rhs) const {
+ return m_node == rhs.m_node;
}
- bool operator!=(const Iterator& rhs) const {
+ constexpr bool operator!=(const Iterator& rhs) const {
return !(*this == rhs);
}
- pointer operator->() const {
- return this->node;
+ constexpr pointer operator->() const {
+ return m_node;
}
- reference operator*() const {
- return *this->node;
+ constexpr reference operator*() const {
+ return *m_node;
}
- Iterator& operator++() {
- this->node = GetNext(this->node);
+ constexpr Iterator& operator++() {
+ m_node = GetNext(m_node);
return *this;
}
- Iterator& operator--() {
- this->node = GetPrev(this->node);
+ constexpr Iterator& operator--() {
+ m_node = GetPrev(m_node);
return *this;
}
- Iterator operator++(int) {
+ constexpr Iterator operator++(int) {
const Iterator it{*this};
++(*this);
return it;
}
- Iterator operator--(int) {
+ constexpr Iterator operator--(int) {
const Iterator it{*this};
--(*this);
return it;
}
- operator Iterator<true>() const {
- return Iterator<true>(this->node);
+ constexpr operator Iterator<true>() const {
+ return Iterator<true>(m_node);
}
};
private:
- // Define accessors using RB_* functions.
- bool EmptyImpl() const {
- return root.IsEmpty();
+ constexpr bool EmptyImpl() const {
+ return m_root.IsEmpty();
}
- IntrusiveRedBlackTreeNode* GetMinImpl() const {
- return RB_MIN(const_cast<RootType*>(&root));
+ constexpr IntrusiveRedBlackTreeNode* GetMinImpl() const {
+ return freebsd::RB_MIN(const_cast<RootType&>(m_root));
}
- IntrusiveRedBlackTreeNode* GetMaxImpl() const {
- return RB_MAX(const_cast<RootType*>(&root));
+ constexpr IntrusiveRedBlackTreeNode* GetMaxImpl() const {
+ return freebsd::RB_MAX(const_cast<RootType&>(m_root));
}
- IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) {
- return RB_REMOVE(&root, node);
+ constexpr IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) {
+ return freebsd::RB_REMOVE(m_root, node);
}
public:
- static IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) {
- return RB_NEXT(node);
+ static constexpr IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) {
+ return freebsd::RB_NEXT(node);
}
- static IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) {
- return RB_PREV(node);
+ static constexpr IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) {
+ return freebsd::RB_PREV(node);
}
- static const IntrusiveRedBlackTreeNode* GetNext(const IntrusiveRedBlackTreeNode* node) {
+ static constexpr IntrusiveRedBlackTreeNode const* GetNext(
+ IntrusiveRedBlackTreeNode const* node) {
return static_cast<const IntrusiveRedBlackTreeNode*>(
GetNext(const_cast<IntrusiveRedBlackTreeNode*>(node)));
}
- static const IntrusiveRedBlackTreeNode* GetPrev(const IntrusiveRedBlackTreeNode* node) {
+ static constexpr IntrusiveRedBlackTreeNode const* GetPrev(
+ IntrusiveRedBlackTreeNode const* node) {
return static_cast<const IntrusiveRedBlackTreeNode*>(
GetPrev(const_cast<IntrusiveRedBlackTreeNode*>(node)));
}
public:
- constexpr IntrusiveRedBlackTreeImpl() {}
+ constexpr IntrusiveRedBlackTreeImpl() = default;
// Iterator accessors.
- iterator begin() {
+ constexpr iterator begin() {
return iterator(this->GetMinImpl());
}
- const_iterator begin() const {
+ constexpr const_iterator begin() const {
return const_iterator(this->GetMinImpl());
}
- iterator end() {
+ constexpr iterator end() {
return iterator(static_cast<IntrusiveRedBlackTreeNode*>(nullptr));
}
- const_iterator end() const {
+ constexpr const_iterator end() const {
return const_iterator(static_cast<const IntrusiveRedBlackTreeNode*>(nullptr));
}
- const_iterator cbegin() const {
+ constexpr const_iterator cbegin() const {
return this->begin();
}
- const_iterator cend() const {
+ constexpr const_iterator cend() const {
return this->end();
}
- iterator iterator_to(reference ref) {
- return iterator(&ref);
+ constexpr iterator iterator_to(reference ref) {
+ return iterator(std::addressof(ref));
}
- const_iterator iterator_to(const_reference ref) const {
- return const_iterator(&ref);
+ constexpr const_iterator iterator_to(const_reference ref) const {
+ return const_iterator(std::addressof(ref));
}
// Content management.
- bool empty() const {
+ constexpr bool empty() const {
return this->EmptyImpl();
}
- reference back() {
+ constexpr reference back() {
return *this->GetMaxImpl();
}
- const_reference back() const {
+ constexpr const_reference back() const {
return *this->GetMaxImpl();
}
- reference front() {
+ constexpr reference front() {
return *this->GetMinImpl();
}
- const_reference front() const {
+ constexpr const_reference front() const {
return *this->GetMinImpl();
}
- iterator erase(iterator it) {
+ constexpr iterator erase(iterator it) {
auto cur = std::addressof(*it);
auto next = GetNext(cur);
this->RemoveImpl(cur);
@@ -234,16 +243,16 @@ public:
} // namespace impl
template <typename T>
-concept HasLightCompareType = requires {
- { std::is_same<typename T::LightCompareType, void>::value } -> std::convertible_to<bool>;
+concept HasRedBlackKeyType = requires {
+ { std::is_same<typename T::RedBlackKeyType, void>::value } -> std::convertible_to<bool>;
};
namespace impl {
template <typename T, typename Default>
- consteval auto* GetLightCompareType() {
- if constexpr (HasLightCompareType<T>) {
- return static_cast<typename T::LightCompareType*>(nullptr);
+ consteval auto* GetRedBlackKeyType() {
+ if constexpr (HasRedBlackKeyType<T>) {
+ return static_cast<typename T::RedBlackKeyType*>(nullptr);
} else {
return static_cast<Default*>(nullptr);
}
@@ -252,16 +261,17 @@ namespace impl {
} // namespace impl
template <typename T, typename Default>
-using LightCompareType = std::remove_pointer_t<decltype(impl::GetLightCompareType<T, Default>())>;
+using RedBlackKeyType = std::remove_pointer_t<decltype(impl::GetRedBlackKeyType<T, Default>())>;
template <class T, class Traits, class Comparator>
class IntrusiveRedBlackTree {
+ YUZU_NON_COPYABLE(IntrusiveRedBlackTree);
public:
using ImplType = impl::IntrusiveRedBlackTreeImpl;
private:
- ImplType impl{};
+ ImplType m_impl;
public:
template <bool Const>
@@ -277,9 +287,9 @@ public:
using iterator = Iterator<false>;
using const_iterator = Iterator<true>;
- using light_value_type = LightCompareType<Comparator, value_type>;
- using const_light_pointer = const light_value_type*;
- using const_light_reference = const light_value_type&;
+ using key_type = RedBlackKeyType<Comparator, value_type>;
+ using const_key_pointer = const key_type*;
+ using const_key_reference = const key_type&;
template <bool Const>
class Iterator {
@@ -298,183 +308,201 @@ public:
IntrusiveRedBlackTree::reference>;
private:
- ImplIterator iterator;
+ ImplIterator m_impl;
private:
- explicit Iterator(ImplIterator it) : iterator(it) {}
+ constexpr explicit Iterator(ImplIterator it) : m_impl(it) {}
- explicit Iterator(typename std::conditional<Const, ImplType::const_iterator,
- ImplType::iterator>::type::pointer ptr)
- : iterator(ptr) {}
+ constexpr explicit Iterator(typename ImplIterator::pointer p) : m_impl(p) {}
- ImplIterator GetImplIterator() const {
- return this->iterator;
+ constexpr ImplIterator GetImplIterator() const {
+ return m_impl;
}
public:
- bool operator==(const Iterator& rhs) const {
- return this->iterator == rhs.iterator;
+ constexpr bool operator==(const Iterator& rhs) const {
+ return m_impl == rhs.m_impl;
}
- bool operator!=(const Iterator& rhs) const {
+ constexpr bool operator!=(const Iterator& rhs) const {
return !(*this == rhs);
}
- pointer operator->() const {
- return Traits::GetParent(std::addressof(*this->iterator));
+ constexpr pointer operator->() const {
+ return Traits::GetParent(std::addressof(*m_impl));
}
- reference operator*() const {
- return *Traits::GetParent(std::addressof(*this->iterator));
+ constexpr reference operator*() const {
+ return *Traits::GetParent(std::addressof(*m_impl));
}
- Iterator& operator++() {
- ++this->iterator;
+ constexpr Iterator& operator++() {
+ ++m_impl;
return *this;
}
- Iterator& operator--() {
- --this->iterator;
+ constexpr Iterator& operator--() {
+ --m_impl;
return *this;
}
- Iterator operator++(int) {
+ constexpr Iterator operator++(int) {
const Iterator it{*this};
- ++this->iterator;
+ ++m_impl;
return it;
}
- Iterator operator--(int) {
+ constexpr Iterator operator--(int) {
const Iterator it{*this};
- --this->iterator;
+ --m_impl;
return it;
}
- operator Iterator<true>() const {
- return Iterator<true>(this->iterator);
+ constexpr operator Iterator<true>() const {
+ return Iterator<true>(m_impl);
}
};
private:
- static int CompareImpl(const IntrusiveRedBlackTreeNode* lhs,
- const IntrusiveRedBlackTreeNode* rhs) {
+ static constexpr int CompareImpl(const IntrusiveRedBlackTreeNode* lhs,
+ const IntrusiveRedBlackTreeNode* rhs) {
return Comparator::Compare(*Traits::GetParent(lhs), *Traits::GetParent(rhs));
}
- static int LightCompareImpl(const void* elm, const IntrusiveRedBlackTreeNode* rhs) {
- return Comparator::Compare(*static_cast<const_light_pointer>(elm), *Traits::GetParent(rhs));
+ static constexpr int CompareKeyImpl(const_key_reference key,
+ const IntrusiveRedBlackTreeNode* rhs) {
+ return Comparator::Compare(key, *Traits::GetParent(rhs));
}
// Define accessors using RB_* functions.
- IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) {
- return RB_INSERT(&impl.root, node, CompareImpl);
+ constexpr IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) {
+ return freebsd::RB_INSERT(m_impl.m_root, node, CompareImpl);
}
- IntrusiveRedBlackTreeNode* FindImpl(const IntrusiveRedBlackTreeNode* node) const {
- return RB_FIND(const_cast<ImplType::RootType*>(&impl.root),
- const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
+ constexpr IntrusiveRedBlackTreeNode* FindImpl(IntrusiveRedBlackTreeNode const* node) const {
+ return freebsd::RB_FIND(const_cast<ImplType::RootType&>(m_impl.m_root),
+ const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
}
- IntrusiveRedBlackTreeNode* NFindImpl(const IntrusiveRedBlackTreeNode* node) const {
- return RB_NFIND(const_cast<ImplType::RootType*>(&impl.root),
- const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
+ constexpr IntrusiveRedBlackTreeNode* NFindImpl(IntrusiveRedBlackTreeNode const* node) const {
+ return freebsd::RB_NFIND(const_cast<ImplType::RootType&>(m_impl.m_root),
+ const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
}
- IntrusiveRedBlackTreeNode* FindLightImpl(const_light_pointer lelm) const {
- return RB_FIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root),
- static_cast<const void*>(lelm), LightCompareImpl);
+ constexpr IntrusiveRedBlackTreeNode* FindKeyImpl(const_key_reference key) const {
+ return freebsd::RB_FIND_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key,
+ CompareKeyImpl);
}
- IntrusiveRedBlackTreeNode* NFindLightImpl(const_light_pointer lelm) const {
- return RB_NFIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root),
- static_cast<const void*>(lelm), LightCompareImpl);
+ constexpr IntrusiveRedBlackTreeNode* NFindKeyImpl(const_key_reference key) const {
+ return freebsd::RB_NFIND_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key,
+ CompareKeyImpl);
+ }
+
+ constexpr IntrusiveRedBlackTreeNode* FindExistingImpl(
+ IntrusiveRedBlackTreeNode const* node) const {
+ return freebsd::RB_FIND_EXISTING(const_cast<ImplType::RootType&>(m_impl.m_root),
+ const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
+ }
+
+ constexpr IntrusiveRedBlackTreeNode* FindExistingKeyImpl(const_key_reference key) const {
+ return freebsd::RB_FIND_EXISTING_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key,
+ CompareKeyImpl);
}
public:
constexpr IntrusiveRedBlackTree() = default;
// Iterator accessors.
- iterator begin() {
- return iterator(this->impl.begin());
+ constexpr iterator begin() {
+ return iterator(m_impl.begin());
}
- const_iterator begin() const {
- return const_iterator(this->impl.begin());
+ constexpr const_iterator begin() const {
+ return const_iterator(m_impl.begin());
}
- iterator end() {
- return iterator(this->impl.end());
+ constexpr iterator end() {
+ return iterator(m_impl.end());
}
- const_iterator end() const {
- return const_iterator(this->impl.end());
+ constexpr const_iterator end() const {
+ return const_iterator(m_impl.end());
}
- const_iterator cbegin() const {
+ constexpr const_iterator cbegin() const {
return this->begin();
}
- const_iterator cend() const {
+ constexpr const_iterator cend() const {
return this->end();
}
- iterator iterator_to(reference ref) {
- return iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
+ constexpr iterator iterator_to(reference ref) {
+ return iterator(m_impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
}
- const_iterator iterator_to(const_reference ref) const {
- return const_iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
+ constexpr const_iterator iterator_to(const_reference ref) const {
+ return const_iterator(m_impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
}
// Content management.
- bool empty() const {
- return this->impl.empty();
+ constexpr bool empty() const {
+ return m_impl.empty();
}
- reference back() {
- return *Traits::GetParent(std::addressof(this->impl.back()));
+ constexpr reference back() {
+ return *Traits::GetParent(std::addressof(m_impl.back()));
}
- const_reference back() const {
- return *Traits::GetParent(std::addressof(this->impl.back()));
+ constexpr const_reference back() const {
+ return *Traits::GetParent(std::addressof(m_impl.back()));
}
- reference front() {
- return *Traits::GetParent(std::addressof(this->impl.front()));
+ constexpr reference front() {
+ return *Traits::GetParent(std::addressof(m_impl.front()));
}
- const_reference front() const {
- return *Traits::GetParent(std::addressof(this->impl.front()));
+ constexpr const_reference front() const {
+ return *Traits::GetParent(std::addressof(m_impl.front()));
}
- iterator erase(iterator it) {
- return iterator(this->impl.erase(it.GetImplIterator()));
+ constexpr iterator erase(iterator it) {
+ return iterator(m_impl.erase(it.GetImplIterator()));
}
- iterator insert(reference ref) {
+ constexpr iterator insert(reference ref) {
ImplType::pointer node = Traits::GetNode(std::addressof(ref));
this->InsertImpl(node);
return iterator(node);
}
- iterator find(const_reference ref) const {
+ constexpr iterator find(const_reference ref) const {
return iterator(this->FindImpl(Traits::GetNode(std::addressof(ref))));
}
- iterator nfind(const_reference ref) const {
+ constexpr iterator nfind(const_reference ref) const {
return iterator(this->NFindImpl(Traits::GetNode(std::addressof(ref))));
}
- iterator find_light(const_light_reference ref) const {
- return iterator(this->FindLightImpl(std::addressof(ref)));
+ constexpr iterator find_key(const_key_reference ref) const {
+ return iterator(this->FindKeyImpl(ref));
+ }
+
+ constexpr iterator nfind_key(const_key_reference ref) const {
+ return iterator(this->NFindKeyImpl(ref));
+ }
+
+ constexpr iterator find_existing(const_reference ref) const {
+ return iterator(this->FindExistingImpl(Traits::GetNode(std::addressof(ref))));
}
- iterator nfind_light(const_light_reference ref) const {
- return iterator(this->NFindLightImpl(std::addressof(ref)));
+ constexpr iterator find_existing_key(const_key_reference ref) const {
+ return iterator(this->FindExistingKeyImpl(ref));
}
};
-template <auto T, class Derived = impl::GetParentType<T>>
+template <auto T, class Derived = Common::impl::GetParentType<T>>
class IntrusiveRedBlackTreeMemberTraits;
template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
@@ -498,19 +526,16 @@ private:
return std::addressof(parent->*Member);
}
- static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
- return GetParentPointer<Member, Derived>(node);
+ static Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+ return Common::GetParentPointer<Member, Derived>(node);
}
- static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
- return GetParentPointer<Member, Derived>(node);
+ static Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) {
+ return Common::GetParentPointer<Member, Derived>(node);
}
-
-private:
- static constexpr TypedStorage<Derived> DerivedStorage = {};
};
-template <auto T, class Derived = impl::GetParentType<T>>
+template <auto T, class Derived = Common::impl::GetParentType<T>>
class IntrusiveRedBlackTreeMemberTraitsDeferredAssert;
template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
@@ -521,11 +546,6 @@ public:
IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraitsDeferredAssert, Comparator>;
using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl;
- static constexpr bool IsValid() {
- TypedStorage<Derived> DerivedStorage = {};
- return GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage);
- }
-
private:
template <class, class, class>
friend class IntrusiveRedBlackTree;
@@ -540,30 +560,36 @@ private:
return std::addressof(parent->*Member);
}
- static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
- return GetParentPointer<Member, Derived>(node);
+ static Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
+ return Common::GetParentPointer<Member, Derived>(node);
}
- static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
- return GetParentPointer<Member, Derived>(node);
+ static Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) {
+ return Common::GetParentPointer<Member, Derived>(node);
}
};
template <class Derived>
-class IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode {
+class alignas(void*) IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode {
public:
+ using IntrusiveRedBlackTreeNode::IntrusiveRedBlackTreeNode;
+
constexpr Derived* GetPrev() {
- return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this));
+ return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode*>(
+ impl::IntrusiveRedBlackTreeImpl::GetPrev(this)));
}
constexpr const Derived* GetPrev() const {
- return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this));
+ return static_cast<const Derived*>(static_cast<const IntrusiveRedBlackTreeBaseNode*>(
+ impl::IntrusiveRedBlackTreeImpl::GetPrev(this)));
}
constexpr Derived* GetNext() {
- return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this));
+ return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode*>(
+ impl::IntrusiveRedBlackTreeImpl::GetNext(this)));
}
constexpr const Derived* GetNext() const {
- return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this));
+ return static_cast<const Derived*>(static_cast<const IntrusiveRedBlackTreeBaseNode*>(
+ impl::IntrusiveRedBlackTreeImpl::GetNext(this)));
}
};
@@ -581,19 +607,22 @@ private:
friend class impl::IntrusiveRedBlackTreeImpl;
static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) {
- return static_cast<IntrusiveRedBlackTreeNode*>(parent);
+ return static_cast<IntrusiveRedBlackTreeNode*>(
+ static_cast<IntrusiveRedBlackTreeBaseNode<Derived>*>(parent));
}
static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) {
- return static_cast<const IntrusiveRedBlackTreeNode*>(parent);
+ return static_cast<const IntrusiveRedBlackTreeNode*>(
+ static_cast<const IntrusiveRedBlackTreeBaseNode<Derived>*>(parent));
}
static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
- return static_cast<Derived*>(node);
+ return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode<Derived>*>(node));
}
- static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) {
- return static_cast<const Derived*>(node);
+ static constexpr Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) {
+ return static_cast<const Derived*>(
+ static_cast<const IntrusiveRedBlackTreeBaseNode<Derived>*>(node));
}
};
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index c51c05b28..4a2462ec4 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -218,19 +218,17 @@ private:
Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_)
: filter{filter_}, file_backend{file_backend_filename} {}
- ~Impl() {
- StopBackendThread();
- }
+ ~Impl() = default;
void StartBackendThread() {
- backend_thread = std::thread([this] {
+ backend_thread = std::jthread([this](std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:Log");
Entry entry;
const auto write_logs = [this, &entry]() {
ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });
};
- while (!stop.stop_requested()) {
- entry = message_queue.PopWait(stop.get_token());
+ while (!stop_token.stop_requested()) {
+ entry = message_queue.PopWait(stop_token);
if (entry.filename != nullptr) {
write_logs();
}
@@ -244,11 +242,6 @@ private:
});
}
- void StopBackendThread() {
- stop.request_stop();
- backend_thread.join();
- }
-
Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
const char* function, std::string&& message) const {
using std::chrono::duration_cast;
@@ -283,10 +276,9 @@ private:
ColorConsoleBackend color_console_backend{};
FileBackend file_backend;
- std::stop_source stop;
- std::thread backend_thread;
MPSCQueue<Entry, true> message_queue{};
std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
+ std::jthread backend_thread;
};
} // namespace
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 2810cec15..877e0faa4 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -176,6 +176,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true);
values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
values.cpuopt_unsafe_fastmem_check.SetGlobal(true);
+ values.cpuopt_unsafe_ignore_global_monitor.SetGlobal(true);
// Renderer
values.renderer_backend.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index d06b23a14..a37d83fb3 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -484,12 +484,15 @@ struct Values {
BasicSetting<bool> cpuopt_misc_ir{true, "cpuopt_misc_ir"};
BasicSetting<bool> cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"};
BasicSetting<bool> cpuopt_fastmem{true, "cpuopt_fastmem"};
+ BasicSetting<bool> cpuopt_fastmem_exclusives{true, "cpuopt_fastmem_exclusives"};
+ BasicSetting<bool> cpuopt_recompile_exclusives{true, "cpuopt_recompile_exclusives"};
Setting<bool> cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"};
Setting<bool> cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"};
Setting<bool> cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"};
Setting<bool> cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"};
Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"};
+ Setting<bool> cpuopt_unsafe_ignore_global_monitor{true, "cpuopt_unsafe_ignore_global_monitor"};
// Renderer
RangedSetting<RendererBackend> renderer_backend{
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 6241d08b3..98c82cd17 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -55,22 +55,50 @@ void AppendBuildInfo(FieldCollection& fc) {
void AppendCPUInfo(FieldCollection& fc) {
#ifdef ARCHITECTURE_x86_64
- fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
- fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA4", Common::GetCPUCaps().fma4);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE", Common::GetCPUCaps().sse);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE2", Common::GetCPUCaps().sse2);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE3", Common::GetCPUCaps().sse3);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSSE3", Common::GetCPUCaps().ssse3);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE41", Common::GetCPUCaps().sse4_1);
- fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE42", Common::GetCPUCaps().sse4_2);
+
+ const auto& caps = Common::GetCPUCaps();
+ const auto add_field = [&fc](std::string_view field_name, const auto& field_value) {
+ fc.AddField(FieldType::UserSystem, field_name, field_value);
+ };
+ add_field("CPU_Model", caps.cpu_string);
+ add_field("CPU_BrandString", caps.brand_string);
+
+ add_field("CPU_Extension_x64_SSE", caps.sse);
+ add_field("CPU_Extension_x64_SSE2", caps.sse2);
+ add_field("CPU_Extension_x64_SSE3", caps.sse3);
+ add_field("CPU_Extension_x64_SSSE3", caps.ssse3);
+ add_field("CPU_Extension_x64_SSE41", caps.sse4_1);
+ add_field("CPU_Extension_x64_SSE42", caps.sse4_2);
+
+ add_field("CPU_Extension_x64_AVX", caps.avx);
+ add_field("CPU_Extension_x64_AVX_VNNI", caps.avx_vnni);
+ add_field("CPU_Extension_x64_AVX2", caps.avx2);
+
+ // Skylake-X/SP level AVX512, for compatibility with the previous telemetry field
+ add_field("CPU_Extension_x64_AVX512",
+ caps.avx512f && caps.avx512cd && caps.avx512vl && caps.avx512dq && caps.avx512bw);
+
+ add_field("CPU_Extension_x64_AVX512F", caps.avx512f);
+ add_field("CPU_Extension_x64_AVX512CD", caps.avx512cd);
+ add_field("CPU_Extension_x64_AVX512VL", caps.avx512vl);
+ add_field("CPU_Extension_x64_AVX512DQ", caps.avx512dq);
+ add_field("CPU_Extension_x64_AVX512BW", caps.avx512bw);
+ add_field("CPU_Extension_x64_AVX512BITALG", caps.avx512bitalg);
+ add_field("CPU_Extension_x64_AVX512VBMI", caps.avx512vbmi);
+
+ add_field("CPU_Extension_x64_AES", caps.aes);
+ add_field("CPU_Extension_x64_BMI1", caps.bmi1);
+ add_field("CPU_Extension_x64_BMI2", caps.bmi2);
+ add_field("CPU_Extension_x64_F16C", caps.f16c);
+ add_field("CPU_Extension_x64_FMA", caps.fma);
+ add_field("CPU_Extension_x64_FMA4", caps.fma4);
+ add_field("CPU_Extension_x64_GFNI", caps.gfni);
+ add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc);
+ add_field("CPU_Extension_x64_LZCNT", caps.lzcnt);
+ add_field("CPU_Extension_x64_MOVBE", caps.movbe);
+ add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
+ add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
+ add_field("CPU_Extension_x64_SHA", caps.sha);
#else
fc.AddField(FieldType::UserSystem, "CPU_Model", "Other");
#endif
diff --git a/src/common/telemetry.h b/src/common/telemetry.h
index 4d632f7eb..3524c857e 100644
--- a/src/common/telemetry.h
+++ b/src/common/telemetry.h
@@ -8,6 +8,7 @@
#include <map>
#include <memory>
#include <string>
+#include <string_view>
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -55,8 +56,8 @@ class Field : public FieldInterface {
public:
YUZU_NON_COPYABLE(Field);
- Field(FieldType type_, std::string name_, T value_)
- : name(std::move(name_)), type(type_), value(std::move(value_)) {}
+ Field(FieldType type_, std::string_view name_, T value_)
+ : name(name_), type(type_), value(std::move(value_)) {}
~Field() override = default;
@@ -123,7 +124,7 @@ public:
* @param value Value for the field to add.
*/
template <typename T>
- void AddField(FieldType type, const char* name, T value) {
+ void AddField(FieldType type, std::string_view name, T value) {
return AddField(std::make_unique<Field<T>>(type, name, std::move(value)));
}
diff --git a/src/common/tree.h b/src/common/tree.h
index 18faa4a48..28370e343 100644
--- a/src/common/tree.h
+++ b/src/common/tree.h
@@ -43,294 +43,265 @@
* The maximum height of a red-black tree is 2lg (n+1).
*/
-#include "common/assert.h"
+namespace Common::freebsd {
-namespace Common {
+enum class RBColor {
+ RB_BLACK = 0,
+ RB_RED = 1,
+};
+
+#pragma pack(push, 4)
template <typename T>
-class RBHead {
+class RBEntry {
public:
- [[nodiscard]] T* Root() {
- return rbh_root;
- }
+ constexpr RBEntry() = default;
- [[nodiscard]] const T* Root() const {
- return rbh_root;
+ [[nodiscard]] constexpr T* Left() {
+ return m_rbe_left;
}
-
- void SetRoot(T* root) {
- rbh_root = root;
+ [[nodiscard]] constexpr const T* Left() const {
+ return m_rbe_left;
}
- [[nodiscard]] bool IsEmpty() const {
- return Root() == nullptr;
+ constexpr void SetLeft(T* e) {
+ m_rbe_left = e;
}
-private:
- T* rbh_root = nullptr;
-};
-
-enum class EntryColor {
- Black,
- Red,
-};
-
-template <typename T>
-class RBEntry {
-public:
- [[nodiscard]] T* Left() {
- return rbe_left;
+ [[nodiscard]] constexpr T* Right() {
+ return m_rbe_right;
}
-
- [[nodiscard]] const T* Left() const {
- return rbe_left;
+ [[nodiscard]] constexpr const T* Right() const {
+ return m_rbe_right;
}
- void SetLeft(T* left) {
- rbe_left = left;
+ constexpr void SetRight(T* e) {
+ m_rbe_right = e;
}
- [[nodiscard]] T* Right() {
- return rbe_right;
+ [[nodiscard]] constexpr T* Parent() {
+ return m_rbe_parent;
}
-
- [[nodiscard]] const T* Right() const {
- return rbe_right;
+ [[nodiscard]] constexpr const T* Parent() const {
+ return m_rbe_parent;
}
- void SetRight(T* right) {
- rbe_right = right;
+ constexpr void SetParent(T* e) {
+ m_rbe_parent = e;
}
- [[nodiscard]] T* Parent() {
- return rbe_parent;
+ [[nodiscard]] constexpr bool IsBlack() const {
+ return m_rbe_color == RBColor::RB_BLACK;
}
-
- [[nodiscard]] const T* Parent() const {
- return rbe_parent;
+ [[nodiscard]] constexpr bool IsRed() const {
+ return m_rbe_color == RBColor::RB_RED;
}
-
- void SetParent(T* parent) {
- rbe_parent = parent;
+ [[nodiscard]] constexpr RBColor Color() const {
+ return m_rbe_color;
}
- [[nodiscard]] bool IsBlack() const {
- return rbe_color == EntryColor::Black;
+ constexpr void SetColor(RBColor c) {
+ m_rbe_color = c;
}
- [[nodiscard]] bool IsRed() const {
- return rbe_color == EntryColor::Red;
- }
+private:
+ T* m_rbe_left{};
+ T* m_rbe_right{};
+ T* m_rbe_parent{};
+ RBColor m_rbe_color{RBColor::RB_BLACK};
+};
+#pragma pack(pop)
- [[nodiscard]] EntryColor Color() const {
- return rbe_color;
- }
+template <typename T>
+struct CheckRBEntry {
+ static constexpr bool value = false;
+};
+template <typename T>
+struct CheckRBEntry<RBEntry<T>> {
+ static constexpr bool value = true;
+};
- void SetColor(EntryColor color) {
- rbe_color = color;
- }
+template <typename T>
+concept IsRBEntry = CheckRBEntry<T>::value;
+template <typename T>
+concept HasRBEntry = requires(T& t, const T& ct) {
+ { t.GetRBEntry() } -> std::same_as<RBEntry<T>&>;
+ { ct.GetRBEntry() } -> std::same_as<const RBEntry<T>&>;
+};
+
+template <typename T>
+requires HasRBEntry<T>
+class RBHead {
private:
- T* rbe_left = nullptr;
- T* rbe_right = nullptr;
- T* rbe_parent = nullptr;
- EntryColor rbe_color{};
+ T* m_rbh_root = nullptr;
+
+public:
+ [[nodiscard]] constexpr T* Root() {
+ return m_rbh_root;
+ }
+ [[nodiscard]] constexpr const T* Root() const {
+ return m_rbh_root;
+ }
+ constexpr void SetRoot(T* root) {
+ m_rbh_root = root;
+ }
+
+ [[nodiscard]] constexpr bool IsEmpty() const {
+ return this->Root() == nullptr;
+ }
};
-template <typename Node>
-[[nodiscard]] RBEntry<Node>& RB_ENTRY(Node* node) {
- return node->GetEntry();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr RBEntry<T>& RB_ENTRY(T* t) {
+ return t->GetRBEntry();
}
-
-template <typename Node>
-[[nodiscard]] const RBEntry<Node>& RB_ENTRY(const Node* node) {
- return node->GetEntry();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr const RBEntry<T>& RB_ENTRY(const T* t) {
+ return t->GetRBEntry();
}
-template <typename Node>
-[[nodiscard]] Node* RB_PARENT(Node* node) {
- return RB_ENTRY(node).Parent();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr T* RB_LEFT(T* t) {
+ return RB_ENTRY(t).Left();
}
-
-template <typename Node>
-[[nodiscard]] const Node* RB_PARENT(const Node* node) {
- return RB_ENTRY(node).Parent();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr const T* RB_LEFT(const T* t) {
+ return RB_ENTRY(t).Left();
}
-template <typename Node>
-void RB_SET_PARENT(Node* node, Node* parent) {
- return RB_ENTRY(node).SetParent(parent);
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr T* RB_RIGHT(T* t) {
+ return RB_ENTRY(t).Right();
}
-
-template <typename Node>
-[[nodiscard]] Node* RB_LEFT(Node* node) {
- return RB_ENTRY(node).Left();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr const T* RB_RIGHT(const T* t) {
+ return RB_ENTRY(t).Right();
}
-template <typename Node>
-[[nodiscard]] const Node* RB_LEFT(const Node* node) {
- return RB_ENTRY(node).Left();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr T* RB_PARENT(T* t) {
+ return RB_ENTRY(t).Parent();
}
-
-template <typename Node>
-void RB_SET_LEFT(Node* node, Node* left) {
- return RB_ENTRY(node).SetLeft(left);
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr const T* RB_PARENT(const T* t) {
+ return RB_ENTRY(t).Parent();
}
-template <typename Node>
-[[nodiscard]] Node* RB_RIGHT(Node* node) {
- return RB_ENTRY(node).Right();
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_LEFT(T* t, T* e) {
+ RB_ENTRY(t).SetLeft(e);
}
-
-template <typename Node>
-[[nodiscard]] const Node* RB_RIGHT(const Node* node) {
- return RB_ENTRY(node).Right();
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_RIGHT(T* t, T* e) {
+ RB_ENTRY(t).SetRight(e);
}
-
-template <typename Node>
-void RB_SET_RIGHT(Node* node, Node* right) {
- return RB_ENTRY(node).SetRight(right);
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_PARENT(T* t, T* e) {
+ RB_ENTRY(t).SetParent(e);
}
-template <typename Node>
-[[nodiscard]] bool RB_IS_BLACK(const Node* node) {
- return RB_ENTRY(node).IsBlack();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr bool RB_IS_BLACK(const T* t) {
+ return RB_ENTRY(t).IsBlack();
}
-
-template <typename Node>
-[[nodiscard]] bool RB_IS_RED(const Node* node) {
- return RB_ENTRY(node).IsRed();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr bool RB_IS_RED(const T* t) {
+ return RB_ENTRY(t).IsRed();
}
-template <typename Node>
-[[nodiscard]] EntryColor RB_COLOR(const Node* node) {
- return RB_ENTRY(node).Color();
+template <typename T>
+requires HasRBEntry<T>
+[[nodiscard]] constexpr RBColor RB_COLOR(const T* t) {
+ return RB_ENTRY(t).Color();
}
-template <typename Node>
-void RB_SET_COLOR(Node* node, EntryColor color) {
- return RB_ENTRY(node).SetColor(color);
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_COLOR(T* t, RBColor c) {
+ RB_ENTRY(t).SetColor(c);
}
-template <typename Node>
-void RB_SET(Node* node, Node* parent) {
- auto& entry = RB_ENTRY(node);
- entry.SetParent(parent);
- entry.SetLeft(nullptr);
- entry.SetRight(nullptr);
- entry.SetColor(EntryColor::Red);
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET(T* elm, T* parent) {
+ auto& rb_entry = RB_ENTRY(elm);
+ rb_entry.SetParent(parent);
+ rb_entry.SetLeft(nullptr);
+ rb_entry.SetRight(nullptr);
+ rb_entry.SetColor(RBColor::RB_RED);
}
-template <typename Node>
-void RB_SET_BLACKRED(Node* black, Node* red) {
- RB_SET_COLOR(black, EntryColor::Black);
- RB_SET_COLOR(red, EntryColor::Red);
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_SET_BLACKRED(T* black, T* red) {
+ RB_SET_COLOR(black, RBColor::RB_BLACK);
+ RB_SET_COLOR(red, RBColor::RB_RED);
}
-template <typename Node>
-void RB_ROTATE_LEFT(RBHead<Node>* head, Node* elm, Node*& tmp) {
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_ROTATE_LEFT(RBHead<T>& head, T* elm, T*& tmp) {
tmp = RB_RIGHT(elm);
- RB_SET_RIGHT(elm, RB_LEFT(tmp));
- if (RB_RIGHT(elm) != nullptr) {
+ if (RB_SET_RIGHT(elm, RB_LEFT(tmp)); RB_RIGHT(elm) != nullptr) {
RB_SET_PARENT(RB_LEFT(tmp), elm);
}
- RB_SET_PARENT(tmp, RB_PARENT(elm));
- if (RB_PARENT(tmp) != nullptr) {
+ if (RB_SET_PARENT(tmp, RB_PARENT(elm)); RB_PARENT(tmp) != nullptr) {
if (elm == RB_LEFT(RB_PARENT(elm))) {
RB_SET_LEFT(RB_PARENT(elm), tmp);
} else {
RB_SET_RIGHT(RB_PARENT(elm), tmp);
}
} else {
- head->SetRoot(tmp);
+ head.SetRoot(tmp);
}
RB_SET_LEFT(tmp, elm);
RB_SET_PARENT(elm, tmp);
}
-template <typename Node>
-void RB_ROTATE_RIGHT(RBHead<Node>* head, Node* elm, Node*& tmp) {
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_ROTATE_RIGHT(RBHead<T>& head, T* elm, T*& tmp) {
tmp = RB_LEFT(elm);
- RB_SET_LEFT(elm, RB_RIGHT(tmp));
- if (RB_LEFT(elm) != nullptr) {
+ if (RB_SET_LEFT(elm, RB_RIGHT(tmp)); RB_LEFT(elm) != nullptr) {
RB_SET_PARENT(RB_RIGHT(tmp), elm);
}
- RB_SET_PARENT(tmp, RB_PARENT(elm));
- if (RB_PARENT(tmp) != nullptr) {
+ if (RB_SET_PARENT(tmp, RB_PARENT(elm)); RB_PARENT(tmp) != nullptr) {
if (elm == RB_LEFT(RB_PARENT(elm))) {
RB_SET_LEFT(RB_PARENT(elm), tmp);
} else {
RB_SET_RIGHT(RB_PARENT(elm), tmp);
}
} else {
- head->SetRoot(tmp);
+ head.SetRoot(tmp);
}
RB_SET_RIGHT(tmp, elm);
RB_SET_PARENT(elm, tmp);
}
-template <typename Node>
-void RB_INSERT_COLOR(RBHead<Node>* head, Node* elm) {
- Node* parent = nullptr;
- Node* tmp = nullptr;
-
- while ((parent = RB_PARENT(elm)) != nullptr && RB_IS_RED(parent)) {
- Node* gparent = RB_PARENT(parent);
- if (parent == RB_LEFT(gparent)) {
- tmp = RB_RIGHT(gparent);
- if (tmp && RB_IS_RED(tmp)) {
- RB_SET_COLOR(tmp, EntryColor::Black);
- RB_SET_BLACKRED(parent, gparent);
- elm = gparent;
- continue;
- }
-
- if (RB_RIGHT(parent) == elm) {
- RB_ROTATE_LEFT(head, parent, tmp);
- tmp = parent;
- parent = elm;
- elm = tmp;
- }
-
- RB_SET_BLACKRED(parent, gparent);
- RB_ROTATE_RIGHT(head, gparent, tmp);
- } else {
- tmp = RB_LEFT(gparent);
- if (tmp && RB_IS_RED(tmp)) {
- RB_SET_COLOR(tmp, EntryColor::Black);
- RB_SET_BLACKRED(parent, gparent);
- elm = gparent;
- continue;
- }
-
- if (RB_LEFT(parent) == elm) {
- RB_ROTATE_RIGHT(head, parent, tmp);
- tmp = parent;
- parent = elm;
- elm = tmp;
- }
-
- RB_SET_BLACKRED(parent, gparent);
- RB_ROTATE_LEFT(head, gparent, tmp);
- }
- }
-
- RB_SET_COLOR(head->Root(), EntryColor::Black);
-}
-
-template <typename Node>
-void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
- Node* tmp;
- while ((elm == nullptr || RB_IS_BLACK(elm)) && elm != head->Root() && parent != nullptr) {
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_REMOVE_COLOR(RBHead<T>& head, T* parent, T* elm) {
+ T* tmp;
+ while ((elm == nullptr || RB_IS_BLACK(elm)) && elm != head.Root()) {
if (RB_LEFT(parent) == elm) {
tmp = RB_RIGHT(parent);
- if (!tmp) {
- ASSERT_MSG(false, "tmp is invalid!");
- break;
- }
if (RB_IS_RED(tmp)) {
RB_SET_BLACKRED(tmp, parent);
RB_ROTATE_LEFT(head, parent, tmp);
@@ -339,29 +310,29 @@ void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) &&
(RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) {
- RB_SET_COLOR(tmp, EntryColor::Red);
+ RB_SET_COLOR(tmp, RBColor::RB_RED);
elm = parent;
parent = RB_PARENT(elm);
} else {
if (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp))) {
- Node* oleft;
+ T* oleft;
if ((oleft = RB_LEFT(tmp)) != nullptr) {
- RB_SET_COLOR(oleft, EntryColor::Black);
+ RB_SET_COLOR(oleft, RBColor::RB_BLACK);
}
- RB_SET_COLOR(tmp, EntryColor::Red);
+ RB_SET_COLOR(tmp, RBColor::RB_RED);
RB_ROTATE_RIGHT(head, tmp, oleft);
tmp = RB_RIGHT(parent);
}
RB_SET_COLOR(tmp, RB_COLOR(parent));
- RB_SET_COLOR(parent, EntryColor::Black);
+ RB_SET_COLOR(parent, RBColor::RB_BLACK);
if (RB_RIGHT(tmp)) {
- RB_SET_COLOR(RB_RIGHT(tmp), EntryColor::Black);
+ RB_SET_COLOR(RB_RIGHT(tmp), RBColor::RB_BLACK);
}
RB_ROTATE_LEFT(head, parent, tmp);
- elm = head->Root();
+ elm = head.Root();
break;
}
} else {
@@ -372,68 +343,56 @@ void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
tmp = RB_LEFT(parent);
}
- if (!tmp) {
- ASSERT_MSG(false, "tmp is invalid!");
- break;
- }
-
if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) &&
(RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) {
- RB_SET_COLOR(tmp, EntryColor::Red);
+ RB_SET_COLOR(tmp, RBColor::RB_RED);
elm = parent;
parent = RB_PARENT(elm);
} else {
if (RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) {
- Node* oright;
+ T* oright;
if ((oright = RB_RIGHT(tmp)) != nullptr) {
- RB_SET_COLOR(oright, EntryColor::Black);
+ RB_SET_COLOR(oright, RBColor::RB_BLACK);
}
- RB_SET_COLOR(tmp, EntryColor::Red);
+ RB_SET_COLOR(tmp, RBColor::RB_RED);
RB_ROTATE_LEFT(head, tmp, oright);
tmp = RB_LEFT(parent);
}
RB_SET_COLOR(tmp, RB_COLOR(parent));
- RB_SET_COLOR(parent, EntryColor::Black);
+ RB_SET_COLOR(parent, RBColor::RB_BLACK);
if (RB_LEFT(tmp)) {
- RB_SET_COLOR(RB_LEFT(tmp), EntryColor::Black);
+ RB_SET_COLOR(RB_LEFT(tmp), RBColor::RB_BLACK);
}
RB_ROTATE_RIGHT(head, parent, tmp);
- elm = head->Root();
+ elm = head.Root();
break;
}
}
}
if (elm) {
- RB_SET_COLOR(elm, EntryColor::Black);
+ RB_SET_COLOR(elm, RBColor::RB_BLACK);
}
}
-template <typename Node>
-Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
- Node* child = nullptr;
- Node* parent = nullptr;
- Node* old = elm;
- EntryColor color{};
-
- const auto finalize = [&] {
- if (color == EntryColor::Black) {
- RB_REMOVE_COLOR(head, parent, child);
- }
-
- return old;
- };
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_REMOVE(RBHead<T>& head, T* elm) {
+ T* child = nullptr;
+ T* parent = nullptr;
+ T* old = elm;
+ RBColor color = RBColor::RB_BLACK;
if (RB_LEFT(elm) == nullptr) {
child = RB_RIGHT(elm);
} else if (RB_RIGHT(elm) == nullptr) {
child = RB_LEFT(elm);
} else {
- Node* left;
+ T* left;
elm = RB_RIGHT(elm);
while ((left = RB_LEFT(elm)) != nullptr) {
elm = left;
@@ -446,6 +405,7 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
if (child) {
RB_SET_PARENT(child, parent);
}
+
if (parent) {
if (RB_LEFT(parent) == elm) {
RB_SET_LEFT(parent, child);
@@ -453,14 +413,14 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
RB_SET_RIGHT(parent, child);
}
} else {
- head->SetRoot(child);
+ head.SetRoot(child);
}
if (RB_PARENT(elm) == old) {
parent = elm;
}
- elm->SetEntry(old->GetEntry());
+ elm->SetRBEntry(old->GetRBEntry());
if (RB_PARENT(old)) {
if (RB_LEFT(RB_PARENT(old)) == old) {
@@ -469,17 +429,24 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
RB_SET_RIGHT(RB_PARENT(old), elm);
}
} else {
- head->SetRoot(elm);
+ head.SetRoot(elm);
}
+
RB_SET_PARENT(RB_LEFT(old), elm);
+
if (RB_RIGHT(old)) {
RB_SET_PARENT(RB_RIGHT(old), elm);
}
+
if (parent) {
left = parent;
}
- return finalize();
+ if (color == RBColor::RB_BLACK) {
+ RB_REMOVE_COLOR(head, parent, child);
+ }
+
+ return old;
}
parent = RB_PARENT(elm);
@@ -495,17 +462,69 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
RB_SET_RIGHT(parent, child);
}
} else {
- head->SetRoot(child);
+ head.SetRoot(child);
+ }
+
+ if (color == RBColor::RB_BLACK) {
+ RB_REMOVE_COLOR(head, parent, child);
+ }
+
+ return old;
+}
+
+template <typename T>
+requires HasRBEntry<T>
+constexpr void RB_INSERT_COLOR(RBHead<T>& head, T* elm) {
+ T *parent = nullptr, *tmp = nullptr;
+ while ((parent = RB_PARENT(elm)) != nullptr && RB_IS_RED(parent)) {
+ T* gparent = RB_PARENT(parent);
+ if (parent == RB_LEFT(gparent)) {
+ tmp = RB_RIGHT(gparent);
+ if (tmp && RB_IS_RED(tmp)) {
+ RB_SET_COLOR(tmp, RBColor::RB_BLACK);
+ RB_SET_BLACKRED(parent, gparent);
+ elm = gparent;
+ continue;
+ }
+
+ if (RB_RIGHT(parent) == elm) {
+ RB_ROTATE_LEFT(head, parent, tmp);
+ tmp = parent;
+ parent = elm;
+ elm = tmp;
+ }
+
+ RB_SET_BLACKRED(parent, gparent);
+ RB_ROTATE_RIGHT(head, gparent, tmp);
+ } else {
+ tmp = RB_LEFT(gparent);
+ if (tmp && RB_IS_RED(tmp)) {
+ RB_SET_COLOR(tmp, RBColor::RB_BLACK);
+ RB_SET_BLACKRED(parent, gparent);
+ elm = gparent;
+ continue;
+ }
+
+ if (RB_LEFT(parent) == elm) {
+ RB_ROTATE_RIGHT(head, parent, tmp);
+ tmp = parent;
+ parent = elm;
+ elm = tmp;
+ }
+
+ RB_SET_BLACKRED(parent, gparent);
+ RB_ROTATE_LEFT(head, gparent, tmp);
+ }
}
- return finalize();
+ RB_SET_COLOR(head.Root(), RBColor::RB_BLACK);
}
-// Inserts a node into the RB tree
-template <typename Node, typename CompareFunction>
-Node* RB_INSERT(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
- Node* parent = nullptr;
- Node* tmp = head->Root();
+template <typename T, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_INSERT(RBHead<T>& head, T* elm, Compare cmp) {
+ T* parent = nullptr;
+ T* tmp = head.Root();
int comp = 0;
while (tmp) {
@@ -529,17 +548,17 @@ Node* RB_INSERT(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
RB_SET_RIGHT(parent, elm);
}
} else {
- head->SetRoot(elm);
+ head.SetRoot(elm);
}
RB_INSERT_COLOR(head, elm);
return nullptr;
}
-// Finds the node with the same key as elm
-template <typename Node, typename CompareFunction>
-Node* RB_FIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
- Node* tmp = head->Root();
+template <typename T, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_FIND(RBHead<T>& head, T* elm, Compare cmp) {
+ T* tmp = head.Root();
while (tmp) {
const int comp = cmp(elm, tmp);
@@ -555,11 +574,11 @@ Node* RB_FIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
return nullptr;
}
-// Finds the first node greater than or equal to the search key
-template <typename Node, typename CompareFunction>
-Node* RB_NFIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
- Node* tmp = head->Root();
- Node* res = nullptr;
+template <typename T, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_NFIND(RBHead<T>& head, T* elm, Compare cmp) {
+ T* tmp = head.Root();
+ T* res = nullptr;
while (tmp) {
const int comp = cmp(elm, tmp);
@@ -576,13 +595,13 @@ Node* RB_NFIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
return res;
}
-// Finds the node with the same key as lelm
-template <typename Node, typename CompareFunction>
-Node* RB_FIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) {
- Node* tmp = head->Root();
+template <typename T, typename U, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_FIND_KEY(RBHead<T>& head, const U& key, Compare cmp) {
+ T* tmp = head.Root();
while (tmp) {
- const int comp = lcmp(lelm, tmp);
+ const int comp = cmp(key, tmp);
if (comp < 0) {
tmp = RB_LEFT(tmp);
} else if (comp > 0) {
@@ -595,14 +614,14 @@ Node* RB_FIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp)
return nullptr;
}
-// Finds the first node greater than or equal to the search key
-template <typename Node, typename CompareFunction>
-Node* RB_NFIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) {
- Node* tmp = head->Root();
- Node* res = nullptr;
+template <typename T, typename U, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_NFIND_KEY(RBHead<T>& head, const U& key, Compare cmp) {
+ T* tmp = head.Root();
+ T* res = nullptr;
while (tmp) {
- const int comp = lcmp(lelm, tmp);
+ const int comp = cmp(key, tmp);
if (comp < 0) {
res = tmp;
tmp = RB_LEFT(tmp);
@@ -616,8 +635,43 @@ Node* RB_NFIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp)
return res;
}
-template <typename Node>
-Node* RB_NEXT(Node* elm) {
+template <typename T, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_FIND_EXISTING(RBHead<T>& head, T* elm, Compare cmp) {
+ T* tmp = head.Root();
+
+ while (true) {
+ const int comp = cmp(elm, tmp);
+ if (comp < 0) {
+ tmp = RB_LEFT(tmp);
+ } else if (comp > 0) {
+ tmp = RB_RIGHT(tmp);
+ } else {
+ return tmp;
+ }
+ }
+}
+
+template <typename T, typename U, typename Compare>
+requires HasRBEntry<T>
+constexpr T* RB_FIND_EXISTING_KEY(RBHead<T>& head, const U& key, Compare cmp) {
+ T* tmp = head.Root();
+
+ while (true) {
+ const int comp = cmp(key, tmp);
+ if (comp < 0) {
+ tmp = RB_LEFT(tmp);
+ } else if (comp > 0) {
+ tmp = RB_RIGHT(tmp);
+ } else {
+ return tmp;
+ }
+ }
+}
+
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_NEXT(T* elm) {
if (RB_RIGHT(elm)) {
elm = RB_RIGHT(elm);
while (RB_LEFT(elm)) {
@@ -636,8 +690,9 @@ Node* RB_NEXT(Node* elm) {
return elm;
}
-template <typename Node>
-Node* RB_PREV(Node* elm) {
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_PREV(T* elm) {
if (RB_LEFT(elm)) {
elm = RB_LEFT(elm);
while (RB_RIGHT(elm)) {
@@ -656,30 +711,32 @@ Node* RB_PREV(Node* elm) {
return elm;
}
-template <typename Node>
-Node* RB_MINMAX(RBHead<Node>* head, bool is_min) {
- Node* tmp = head->Root();
- Node* parent = nullptr;
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_MIN(RBHead<T>& head) {
+ T* tmp = head.Root();
+ T* parent = nullptr;
while (tmp) {
parent = tmp;
- if (is_min) {
- tmp = RB_LEFT(tmp);
- } else {
- tmp = RB_RIGHT(tmp);
- }
+ tmp = RB_LEFT(tmp);
}
return parent;
}
-template <typename Node>
-Node* RB_MIN(RBHead<Node>* head) {
- return RB_MINMAX(head, true);
-}
+template <typename T>
+requires HasRBEntry<T>
+constexpr T* RB_MAX(RBHead<T>& head) {
+ T* tmp = head.Root();
+ T* parent = nullptr;
-template <typename Node>
-Node* RB_MAX(RBHead<Node>* head) {
- return RB_MINMAX(head, false);
+ while (tmp) {
+ parent = tmp;
+ tmp = RB_RIGHT(tmp);
+ }
+
+ return parent;
}
-} // namespace Common
+
+} // namespace Common::freebsd
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index fbeacc7e2..d81edb140 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -1,8 +1,12 @@
-// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator
+// Project Licensed under GPLv2 or any later version Refer to the license.txt file included.
+#include <array>
#include <cstring>
+#include <iterator>
+#include <span>
+#include <string_view>
+#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/x64/cpu_detect.h"
@@ -17,7 +21,7 @@
// clang-format on
#endif
-static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
+static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) {
#if defined(__DragonFly__) || defined(__FreeBSD__)
// Despite the name, this is just do_cpuid() with ECX as second input.
cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
@@ -30,7 +34,7 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
#endif
}
-static inline void __cpuid(int info[4], int function_id) {
+static inline void __cpuid(int info[4], u32 function_id) {
return __cpuidex(info, function_id, 0);
}
@@ -45,6 +49,17 @@ static inline u64 _xgetbv(u32 index) {
namespace Common {
+CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) {
+ if (brand_string == "GenuineIntel") {
+ return Manufacturer::Intel;
+ } else if (brand_string == "AuthenticAMD") {
+ return Manufacturer::AMD;
+ } else if (brand_string == "HygonGenuine") {
+ return Manufacturer::Hygon;
+ }
+ return Manufacturer::Unknown;
+}
+
// Detects the various CPU features
static CPUCaps Detect() {
CPUCaps caps = {};
@@ -53,75 +68,74 @@ static CPUCaps Detect() {
// yuzu at all anyway
int cpu_id[4];
- memset(caps.brand_string, 0, sizeof(caps.brand_string));
- // Detect CPU's CPUID capabilities and grab CPU string
+ // Detect CPU's CPUID capabilities and grab manufacturer string
__cpuid(cpu_id, 0x00000000);
- u32 max_std_fn = cpu_id[0]; // EAX
-
- std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
- std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
- std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
- if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
- caps.manufacturer = Manufacturer::Intel;
- else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
- caps.manufacturer = Manufacturer::AMD;
- else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
- caps.manufacturer = Manufacturer::Hygon;
- else
- caps.manufacturer = Manufacturer::Unknown;
+ const u32 max_std_fn = cpu_id[0]; // EAX
- __cpuid(cpu_id, 0x80000000);
+ std::memset(caps.brand_string, 0, std::size(caps.brand_string));
+ std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(u32));
+ std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(u32));
+ std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(u32));
+
+ caps.manufacturer = CPUCaps::ParseManufacturer(caps.brand_string);
- u32 max_ex_fn = cpu_id[0];
+ // Set reasonable default cpu string even if brand string not available
+ std::strncpy(caps.cpu_string, caps.brand_string, std::size(caps.brand_string));
- // Set reasonable default brand string even if brand string not available
- strcpy(caps.cpu_string, caps.brand_string);
+ __cpuid(cpu_id, 0x80000000);
+
+ const u32 max_ex_fn = cpu_id[0];
// Detect family and other miscellaneous features
if (max_std_fn >= 1) {
__cpuid(cpu_id, 0x00000001);
- if ((cpu_id[3] >> 25) & 1)
- caps.sse = true;
- if ((cpu_id[3] >> 26) & 1)
- caps.sse2 = true;
- if ((cpu_id[2]) & 1)
- caps.sse3 = true;
- if ((cpu_id[2] >> 9) & 1)
- caps.ssse3 = true;
- if ((cpu_id[2] >> 19) & 1)
- caps.sse4_1 = true;
- if ((cpu_id[2] >> 20) & 1)
- caps.sse4_2 = true;
- if ((cpu_id[2] >> 25) & 1)
- caps.aes = true;
+ caps.sse = Common::Bit<25>(cpu_id[3]);
+ caps.sse2 = Common::Bit<26>(cpu_id[3]);
+ caps.sse3 = Common::Bit<0>(cpu_id[2]);
+ caps.pclmulqdq = Common::Bit<1>(cpu_id[2]);
+ caps.ssse3 = Common::Bit<9>(cpu_id[2]);
+ caps.sse4_1 = Common::Bit<19>(cpu_id[2]);
+ caps.sse4_2 = Common::Bit<20>(cpu_id[2]);
+ caps.movbe = Common::Bit<22>(cpu_id[2]);
+ caps.popcnt = Common::Bit<23>(cpu_id[2]);
+ caps.aes = Common::Bit<25>(cpu_id[2]);
+ caps.f16c = Common::Bit<29>(cpu_id[2]);
// AVX support requires 3 separate checks:
// - Is the AVX bit set in CPUID?
// - Is the XSAVE bit set in CPUID?
// - XGETBV result has the XCR bit set.
- if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) {
+ if (Common::Bit<28>(cpu_id[2]) && Common::Bit<27>(cpu_id[2])) {
if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
caps.avx = true;
- if ((cpu_id[2] >> 12) & 1)
+ if (Common::Bit<12>(cpu_id[2]))
caps.fma = true;
}
}
if (max_std_fn >= 7) {
__cpuidex(cpu_id, 0x00000007, 0x00000000);
- // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
- if ((cpu_id[1] >> 5) & 1)
- caps.avx2 = caps.avx;
- if ((cpu_id[1] >> 3) & 1)
- caps.bmi1 = true;
- if ((cpu_id[1] >> 8) & 1)
- caps.bmi2 = true;
- // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
- if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
- (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
- caps.avx512 = caps.avx2;
+ // Can't enable AVX{2,512} unless the XSAVE/XGETBV checks above passed
+ if (caps.avx) {
+ caps.avx2 = Common::Bit<5>(cpu_id[1]);
+ caps.avx512f = Common::Bit<16>(cpu_id[1]);
+ caps.avx512dq = Common::Bit<17>(cpu_id[1]);
+ caps.avx512cd = Common::Bit<28>(cpu_id[1]);
+ caps.avx512bw = Common::Bit<30>(cpu_id[1]);
+ caps.avx512vl = Common::Bit<31>(cpu_id[1]);
+ caps.avx512vbmi = Common::Bit<1>(cpu_id[2]);
+ caps.avx512bitalg = Common::Bit<12>(cpu_id[2]);
}
+
+ caps.bmi1 = Common::Bit<3>(cpu_id[1]);
+ caps.bmi2 = Common::Bit<8>(cpu_id[1]);
+ caps.sha = Common::Bit<29>(cpu_id[1]);
+
+ caps.gfni = Common::Bit<8>(cpu_id[2]);
+
+ __cpuidex(cpu_id, 0x00000007, 0x00000001);
+ caps.avx_vnni = caps.avx && Common::Bit<4>(cpu_id[0]);
}
}
@@ -138,15 +152,13 @@ static CPUCaps Detect() {
if (max_ex_fn >= 0x80000001) {
// Check for more features
__cpuid(cpu_id, 0x80000001);
- if ((cpu_id[2] >> 16) & 1)
- caps.fma4 = true;
+ caps.lzcnt = Common::Bit<5>(cpu_id[2]);
+ caps.fma4 = Common::Bit<16>(cpu_id[2]);
}
if (max_ex_fn >= 0x80000007) {
__cpuid(cpu_id, 0x80000007);
- if (cpu_id[3] & (1 << 8)) {
- caps.invariant_tsc = true;
- }
+ caps.invariant_tsc = Common::Bit<8>(cpu_id[3]);
}
if (max_std_fn >= 0x16) {
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index e3b63302e..40c48b132 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -1,42 +1,65 @@
-// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
+// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator
+// Project Project Licensed under GPLv2 or any later version Refer to the license.txt file included.
#pragma once
-namespace Common {
+#include <string_view>
+#include "common/common_types.h"
-enum class Manufacturer : u32 {
- Intel = 0,
- AMD = 1,
- Hygon = 2,
- Unknown = 3,
-};
+namespace Common {
/// x86/x64 CPU capabilities that may be detected by this module
struct CPUCaps {
+
+ enum class Manufacturer : u8 {
+ Unknown = 0,
+ Intel = 1,
+ AMD = 2,
+ Hygon = 3,
+ };
+
+ static Manufacturer ParseManufacturer(std::string_view brand_string);
+
Manufacturer manufacturer;
- char cpu_string[0x21];
- char brand_string[0x41];
- bool sse;
- bool sse2;
- bool sse3;
- bool ssse3;
- bool sse4_1;
- bool sse4_2;
- bool lzcnt;
- bool avx;
- bool avx2;
- bool avx512;
- bool bmi1;
- bool bmi2;
- bool fma;
- bool fma4;
- bool aes;
- bool invariant_tsc;
+ char brand_string[13];
+
+ char cpu_string[48];
+
u32 base_frequency;
u32 max_frequency;
u32 bus_frequency;
+
+ bool sse : 1;
+ bool sse2 : 1;
+ bool sse3 : 1;
+ bool ssse3 : 1;
+ bool sse4_1 : 1;
+ bool sse4_2 : 1;
+
+ bool avx : 1;
+ bool avx_vnni : 1;
+ bool avx2 : 1;
+ bool avx512f : 1;
+ bool avx512dq : 1;
+ bool avx512cd : 1;
+ bool avx512bw : 1;
+ bool avx512vl : 1;
+ bool avx512vbmi : 1;
+ bool avx512bitalg : 1;
+
+ bool aes : 1;
+ bool bmi1 : 1;
+ bool bmi2 : 1;
+ bool f16c : 1;
+ bool fma : 1;
+ bool fma4 : 1;
+ bool gfni : 1;
+ bool invariant_tsc : 1;
+ bool lzcnt : 1;
+ bool movbe : 1;
+ bool pclmulqdq : 1;
+ bool popcnt : 1;
+ bool sha : 1;
};
/**
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 7ed43bfb1..1f234c822 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -154,6 +154,7 @@ add_library(core STATIC
hle/api_version.h
hle/ipc.h
hle/ipc_helpers.h
+ hle/kernel/board/nintendo/nx/k_memory_layout.h
hle/kernel/board/nintendo/nx/k_system_control.cpp
hle/kernel/board/nintendo/nx/k_system_control.h
hle/kernel/board/nintendo/nx/secure_monitor.h
@@ -166,6 +167,7 @@ add_library(core STATIC
hle/kernel/hle_ipc.h
hle/kernel/init/init_slab_setup.cpp
hle/kernel/init/init_slab_setup.h
+ hle/kernel/initial_process.h
hle/kernel/k_address_arbiter.cpp
hle/kernel/k_address_arbiter.h
hle/kernel/k_address_space_info.cpp
@@ -207,6 +209,7 @@ add_library(core STATIC
hle/kernel/k_memory_region.h
hle/kernel/k_memory_region_type.h
hle/kernel/k_page_bitmap.h
+ hle/kernel/k_page_buffer.h
hle/kernel/k_page_heap.cpp
hle/kernel/k_page_heap.h
hle/kernel/k_page_linked_list.h
@@ -244,6 +247,8 @@ add_library(core STATIC
hle/kernel/k_system_control.h
hle/kernel/k_thread.cpp
hle/kernel/k_thread.h
+ hle/kernel/k_thread_local_page.cpp
+ hle/kernel/k_thread_local_page.h
hle/kernel/k_thread_queue.cpp
hle/kernel/k_thread_queue.h
hle/kernel/k_trace.h
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index b0d89c539..c1c843b8f 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -137,6 +137,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
config.only_detect_misalignment_via_page_table_on_page_boundary = true;
+ config.fastmem_exclusive_access = true;
+ config.recompile_on_exclusive_fastmem_failure = true;
// Multi-process state
config.processor_id = core_index;
@@ -146,8 +148,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
- config.code_cache_size = 512_MiB;
- config.far_code_offset = 400_MiB;
+ config.code_cache_size = 128_MiB;
+ config.far_code_offset = 100_MiB;
// Safe optimizations
if (Settings::values.cpu_debug_mode) {
@@ -178,6 +180,12 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_fastmem) {
config.fastmem_pointer = nullptr;
}
+ if (!Settings::values.cpuopt_fastmem_exclusives) {
+ config.fastmem_exclusive_access = false;
+ }
+ if (!Settings::values.cpuopt_recompile_exclusives) {
+ config.recompile_on_exclusive_fastmem_failure = false;
+ }
}
// Unsafe optimizations
@@ -195,6 +203,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
+ if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
+ }
}
// Curated optimizations
@@ -203,6 +214,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
return std::make_unique<Dynarmic::A32::Jit>(config);
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 56836bd05..aa74fce4d 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -185,6 +185,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.fastmem_pointer = page_table->fastmem_arena;
config.fastmem_address_space_bits = address_space_bits;
config.silently_mirror_fastmem = false;
+
+ config.fastmem_exclusive_access = true;
+ config.recompile_on_exclusive_fastmem_failure = true;
}
// Multi-process state
@@ -205,8 +208,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
- config.code_cache_size = 512_MiB;
- config.far_code_offset = 400_MiB;
+ config.code_cache_size = 128_MiB;
+ config.far_code_offset = 100_MiB;
// Safe optimizations
if (Settings::values.cpu_debug_mode) {
@@ -237,6 +240,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_fastmem) {
config.fastmem_pointer = nullptr;
}
+ if (!Settings::values.cpuopt_fastmem_exclusives) {
+ config.fastmem_exclusive_access = false;
+ }
+ if (!Settings::values.cpuopt_recompile_exclusives) {
+ config.recompile_on_exclusive_fastmem_failure = false;
+ }
}
// Unsafe optimizations
@@ -254,6 +263,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (Settings::values.cpuopt_unsafe_fastmem_check) {
config.fastmem_address_space_bits = 64;
}
+ if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
+ }
}
// Curated optimizations
@@ -262,6 +274,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
config.fastmem_address_space_bits = 64;
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
return std::make_shared<Dynarmic::A64::Jit>(config);
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
index 397d054a8..ea6b224e0 100644
--- a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
@@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad
});
}
-void DynarmicExclusiveMonitor::ClearExclusive() {
- monitor.Clear();
+void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
+ monitor.ClearProcessor(core_index);
}
bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h
index 265c4ecef..5a15b43ef 100644
--- a/src/core/arm/dynarmic/arm_exclusive_monitor.h
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h
@@ -29,7 +29,7 @@ public:
u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override;
- void ClearExclusive() override;
+ void ClearExclusive(std::size_t core_index) override;
bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
index 62f6e6023..9914ca3da 100644
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -23,7 +23,7 @@ public:
virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;
- virtual void ClearExclusive() = 0;
+ virtual void ClearExclusive(std::size_t core_index) = 0;
virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b0cfee3ee..c60a784c3 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -326,7 +326,9 @@ struct System::Impl {
is_powered_on = false;
exit_lock = false;
- gpu_core->NotifyShutdown();
+ if (gpu_core != nullptr) {
+ gpu_core->NotifyShutdown();
+ }
services.reset();
service_manager.reset();
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index e413a520a..b3bffecb2 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -42,11 +42,20 @@ public:
context.MakeCurrent();
}
~Scoped() {
- context.DoneCurrent();
+ if (active) {
+ context.DoneCurrent();
+ }
+ }
+
+ /// In the event that context was destroyed before the Scoped is destroyed, this provides a
+ /// mechanism to prevent calling a destroyed object's method during the deconstructor
+ void Cancel() {
+ active = false;
}
private:
GraphicsContext& context;
+ bool active{true};
};
/// Calls MakeCurrent on the context and calls DoneCurrent when the scope for the returned value
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 026257115..3c4e45fcd 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -385,7 +385,7 @@ public:
T PopRaw();
template <class T>
- std::shared_ptr<T> PopIpcInterface() {
+ std::weak_ptr<T> PopIpcInterface() {
ASSERT(context->Session()->IsDomain());
ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
return context->GetDomainHandler<T>(Pop<u32>() - 1);
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h
new file mode 100644
index 000000000..01e225088
--- /dev/null
+++ b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h
@@ -0,0 +1,13 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Kernel {
+
+constexpr inline PAddr MainMemoryAddress = 0x80000000;
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
index 702cacffc..8027bec00 100644
--- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
+++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
@@ -39,6 +39,10 @@ Smc::MemoryArrangement GetMemoryArrangeForInit() {
}
} // namespace
+size_t KSystemControl::Init::GetRealMemorySize() {
+ return GetIntendedMemorySize();
+}
+
// Initialization.
size_t KSystemControl::Init::GetIntendedMemorySize() {
switch (GetMemorySizeForInit()) {
@@ -53,7 +57,13 @@ size_t KSystemControl::Init::GetIntendedMemorySize() {
}
PAddr KSystemControl::Init::GetKernelPhysicalBaseAddress(u64 base_address) {
- return base_address;
+ const size_t real_dram_size = KSystemControl::Init::GetRealMemorySize();
+ const size_t intended_dram_size = KSystemControl::Init::GetIntendedMemorySize();
+ if (intended_dram_size * 2 < real_dram_size) {
+ return base_address;
+ } else {
+ return base_address + ((real_dram_size - intended_dram_size) / 2);
+ }
}
bool KSystemControl::Init::ShouldIncreaseThreadResourceLimit() {
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h
index 52f230ced..df2a17f2a 100644
--- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h
+++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h
@@ -13,6 +13,7 @@ public:
class Init {
public:
// Initialization.
+ static std::size_t GetRealMemorySize();
static std::size_t GetIntendedMemorySize();
static PAddr GetKernelPhysicalBaseAddress(u64 base_address);
static bool ShouldIncreaseThreadResourceLimit();
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index e19544c54..9f2175f82 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -45,7 +45,7 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co
LOG_CRITICAL(IPC, "object_id {} is too big!", object_id);
return false;
}
- return DomainHandler(object_id - 1) != nullptr;
+ return DomainHandler(object_id - 1).lock() != nullptr;
} else {
return session_handler != nullptr;
}
@@ -53,9 +53,6 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co
void SessionRequestHandler::ClientConnected(KServerSession* session) {
session->ClientConnected(shared_from_this());
-
- // Ensure our server session is tracked globally.
- kernel.RegisterServerSession(session);
}
void SessionRequestHandler::ClientDisconnected(KServerSession* session) {
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index 754b41ff6..670cc741c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -94,6 +94,7 @@ protected:
std::weak_ptr<ServiceThread> service_thread;
};
+using SessionRequestHandlerWeakPtr = std::weak_ptr<SessionRequestHandler>;
using SessionRequestHandlerPtr = std::shared_ptr<SessionRequestHandler>;
/**
@@ -139,7 +140,7 @@ public:
}
}
- SessionRequestHandlerPtr DomainHandler(std::size_t index) const {
+ SessionRequestHandlerWeakPtr DomainHandler(std::size_t index) const {
ASSERT_MSG(index < DomainHandlerCount(), "Unexpected handler index {}", index);
return domain_handlers.at(index);
}
@@ -328,10 +329,10 @@ public:
template <typename T>
std::shared_ptr<T> GetDomainHandler(std::size_t index) const {
- return std::static_pointer_cast<T>(manager->DomainHandler(index));
+ return std::static_pointer_cast<T>(manager.lock()->DomainHandler(index).lock());
}
- void SetSessionRequestManager(std::shared_ptr<SessionRequestManager> manager_) {
+ void SetSessionRequestManager(std::weak_ptr<SessionRequestManager> manager_) {
manager = std::move(manager_);
}
@@ -374,7 +375,7 @@ private:
u32 handles_offset{};
u32 domain_offset{};
- std::shared_ptr<SessionRequestManager> manager;
+ std::weak_ptr<SessionRequestManager> manager;
KernelCore& kernel;
Core::Memory::Memory& memory;
diff --git a/src/core/hle/kernel/init/init_slab_setup.cpp b/src/core/hle/kernel/init/init_slab_setup.cpp
index 36fc0944a..b0f773ee0 100644
--- a/src/core/hle/kernel/init/init_slab_setup.cpp
+++ b/src/core/hle/kernel/init/init_slab_setup.cpp
@@ -7,19 +7,23 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "core/core.h"
+#include "core/device_memory.h"
#include "core/hardware_properties.h"
#include "core/hle/kernel/init/init_slab_setup.h"
#include "core/hle/kernel/k_code_memory.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_memory_layout.h"
#include "core/hle/kernel/k_memory_manager.h"
+#include "core/hle/kernel/k_page_buffer.h"
#include "core/hle/kernel/k_port.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/k_resource_limit.h"
#include "core/hle/kernel/k_session.h"
#include "core/hle/kernel/k_shared_memory.h"
+#include "core/hle/kernel/k_shared_memory_info.h"
#include "core/hle/kernel/k_system_control.h"
#include "core/hle/kernel/k_thread.h"
+#include "core/hle/kernel/k_thread_local_page.h"
#include "core/hle/kernel/k_transfer_memory.h"
namespace Kernel::Init {
@@ -32,9 +36,13 @@ namespace Kernel::Init {
HANDLER(KEvent, (SLAB_COUNT(KEvent)), ##__VA_ARGS__) \
HANDLER(KPort, (SLAB_COUNT(KPort)), ##__VA_ARGS__) \
HANDLER(KSharedMemory, (SLAB_COUNT(KSharedMemory)), ##__VA_ARGS__) \
+ HANDLER(KSharedMemoryInfo, (SLAB_COUNT(KSharedMemory) * 8), ##__VA_ARGS__) \
HANDLER(KTransferMemory, (SLAB_COUNT(KTransferMemory)), ##__VA_ARGS__) \
HANDLER(KCodeMemory, (SLAB_COUNT(KCodeMemory)), ##__VA_ARGS__) \
HANDLER(KSession, (SLAB_COUNT(KSession)), ##__VA_ARGS__) \
+ HANDLER(KThreadLocalPage, \
+ (SLAB_COUNT(KProcess) + (SLAB_COUNT(KProcess) + SLAB_COUNT(KThread)) / 8), \
+ ##__VA_ARGS__) \
HANDLER(KResourceLimit, (SLAB_COUNT(KResourceLimit)), ##__VA_ARGS__)
namespace {
@@ -50,38 +58,46 @@ enum KSlabType : u32 {
// Constexpr counts.
constexpr size_t SlabCountKProcess = 80;
constexpr size_t SlabCountKThread = 800;
-constexpr size_t SlabCountKEvent = 700;
+constexpr size_t SlabCountKEvent = 900;
constexpr size_t SlabCountKInterruptEvent = 100;
-constexpr size_t SlabCountKPort = 256 + 0x20; // Extra 0x20 ports over Nintendo for homebrew.
+constexpr size_t SlabCountKPort = 384;
constexpr size_t SlabCountKSharedMemory = 80;
constexpr size_t SlabCountKTransferMemory = 200;
constexpr size_t SlabCountKCodeMemory = 10;
constexpr size_t SlabCountKDeviceAddressSpace = 300;
-constexpr size_t SlabCountKSession = 933;
+constexpr size_t SlabCountKSession = 1133;
constexpr size_t SlabCountKLightSession = 100;
constexpr size_t SlabCountKObjectName = 7;
constexpr size_t SlabCountKResourceLimit = 5;
constexpr size_t SlabCountKDebug = Core::Hardware::NUM_CPU_CORES;
-constexpr size_t SlabCountKAlpha = 1;
-constexpr size_t SlabCountKBeta = 6;
+constexpr size_t SlabCountKIoPool = 1;
+constexpr size_t SlabCountKIoRegion = 6;
constexpr size_t SlabCountExtraKThread = 160;
+/// Helper function to translate from the slab virtual address to the reserved location in physical
+/// memory.
+static PAddr TranslateSlabAddrToPhysical(KMemoryLayout& memory_layout, VAddr slab_addr) {
+ slab_addr -= memory_layout.GetSlabRegionAddress();
+ return slab_addr + Core::DramMemoryMap::SlabHeapBase;
+}
+
template <typename T>
VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAddr address,
size_t num_objects) {
- // TODO(bunnei): This is just a place holder. We should initialize the appropriate KSlabHeap for
- // kernel object type T with the backing kernel memory pointer once we emulate kernel memory.
const size_t size = Common::AlignUp(sizeof(T) * num_objects, alignof(void*));
VAddr start = Common::AlignUp(address, alignof(T));
- // This is intentionally empty. Once KSlabHeap is fully implemented, we can replace this with
- // the pointer to emulated memory to pass along. Until then, KSlabHeap will just allocate/free
- // host memory.
- void* backing_kernel_memory{};
+ // This should use the virtual memory address passed in, but currently, we do not setup the
+ // kernel virtual memory layout. Instead, we simply map these at a region of physical memory
+ // that we reserve for the slab heaps.
+ // TODO(bunnei): Fix this once we support the kernel virtual memory layout.
if (size > 0) {
+ void* backing_kernel_memory{
+ system.DeviceMemory().GetPointer(TranslateSlabAddrToPhysical(memory_layout, start))};
+
const KMemoryRegion* region = memory_layout.FindVirtual(start + size - 1);
ASSERT(region != nullptr);
ASSERT(region->IsDerivedFrom(KMemoryRegionType_KernelSlab));
@@ -91,6 +107,12 @@ VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAd
return start + size;
}
+size_t CalculateSlabHeapGapSize() {
+ constexpr size_t KernelSlabHeapGapSize = 2_MiB - 296_KiB;
+ static_assert(KernelSlabHeapGapSize <= KernelSlabHeapGapsSizeMax);
+ return KernelSlabHeapGapSize;
+}
+
} // namespace
KSlabResourceCounts KSlabResourceCounts::CreateDefault() {
@@ -109,8 +131,8 @@ KSlabResourceCounts KSlabResourceCounts::CreateDefault() {
.num_KObjectName = SlabCountKObjectName,
.num_KResourceLimit = SlabCountKResourceLimit,
.num_KDebug = SlabCountKDebug,
- .num_KAlpha = SlabCountKAlpha,
- .num_KBeta = SlabCountKBeta,
+ .num_KIoPool = SlabCountKIoPool,
+ .num_KIoRegion = SlabCountKIoRegion,
};
}
@@ -136,11 +158,34 @@ size_t CalculateTotalSlabHeapSize(const KernelCore& kernel) {
#undef ADD_SLAB_SIZE
// Add the reserved size.
- size += KernelSlabHeapGapsSize;
+ size += CalculateSlabHeapGapSize();
return size;
}
+void InitializeKPageBufferSlabHeap(Core::System& system) {
+ auto& kernel = system.Kernel();
+
+ const auto& counts = kernel.SlabResourceCounts();
+ const size_t num_pages =
+ counts.num_KProcess + counts.num_KThread + (counts.num_KProcess + counts.num_KThread) / 8;
+ const size_t slab_size = num_pages * PageSize;
+
+ // Reserve memory from the system resource limit.
+ ASSERT(kernel.GetSystemResourceLimit()->Reserve(LimitableResource::PhysicalMemory, slab_size));
+
+ // Allocate memory for the slab.
+ constexpr auto AllocateOption = KMemoryManager::EncodeOption(
+ KMemoryManager::Pool::System, KMemoryManager::Direction::FromFront);
+ const PAddr slab_address =
+ kernel.MemoryManager().AllocateAndOpenContinuous(num_pages, 1, AllocateOption);
+ ASSERT(slab_address != 0);
+
+ // Initialize the slabheap.
+ KPageBuffer::InitializeSlabHeap(kernel, system.DeviceMemory().GetPointer(slab_address),
+ slab_size);
+}
+
void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
auto& kernel = system.Kernel();
@@ -160,13 +205,13 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
}
// Create an array to represent the gaps between the slabs.
- const size_t total_gap_size = KernelSlabHeapGapsSize;
+ const size_t total_gap_size = CalculateSlabHeapGapSize();
std::array<size_t, slab_types.size()> slab_gaps;
- for (size_t i = 0; i < slab_gaps.size(); i++) {
+ for (auto& slab_gap : slab_gaps) {
// Note: This is an off-by-one error from Nintendo's intention, because GenerateRandomRange
// is inclusive. However, Nintendo also has the off-by-one error, and it's "harmless", so we
// will include it ourselves.
- slab_gaps[i] = KSystemControl::GenerateRandomRange(0, total_gap_size);
+ slab_gap = KSystemControl::GenerateRandomRange(0, total_gap_size);
}
// Sort the array, so that we can treat differences between values as offsets to the starts of
@@ -177,13 +222,21 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
}
}
- for (size_t i = 0; i < slab_types.size(); i++) {
+ // Track the gaps, so that we can free them to the unused slab tree.
+ VAddr gap_start = address;
+ size_t gap_size = 0;
+
+ for (size_t i = 0; i < slab_gaps.size(); i++) {
// Add the random gap to the address.
- address += (i == 0) ? slab_gaps[0] : slab_gaps[i] - slab_gaps[i - 1];
+ const auto cur_gap = (i == 0) ? slab_gaps[0] : slab_gaps[i] - slab_gaps[i - 1];
+ address += cur_gap;
+ gap_size += cur_gap;
#define INITIALIZE_SLAB_HEAP(NAME, COUNT, ...) \
case KSlabType_##NAME: \
- address = InitializeSlabHeap<NAME>(system, memory_layout, address, COUNT); \
+ if (COUNT > 0) { \
+ address = InitializeSlabHeap<NAME>(system, memory_layout, address, COUNT); \
+ } \
break;
// Initialize the slabheap.
@@ -192,7 +245,13 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
FOREACH_SLAB_TYPE(INITIALIZE_SLAB_HEAP)
// If we somehow get an invalid type, abort.
default:
- UNREACHABLE();
+ UNREACHABLE_MSG("Unknown slab type: {}", slab_types[i]);
+ }
+
+ // If we've hit the end of a gap, free it.
+ if (gap_start + gap_size != address) {
+ gap_start = address;
+ gap_size = 0;
}
}
}
diff --git a/src/core/hle/kernel/init/init_slab_setup.h b/src/core/hle/kernel/init/init_slab_setup.h
index a8f7e0918..f54b67d02 100644
--- a/src/core/hle/kernel/init/init_slab_setup.h
+++ b/src/core/hle/kernel/init/init_slab_setup.h
@@ -32,12 +32,13 @@ struct KSlabResourceCounts {
size_t num_KObjectName;
size_t num_KResourceLimit;
size_t num_KDebug;
- size_t num_KAlpha;
- size_t num_KBeta;
+ size_t num_KIoPool;
+ size_t num_KIoRegion;
};
void InitializeSlabResourceCounts(KernelCore& kernel);
size_t CalculateTotalSlabHeapSize(const KernelCore& kernel);
+void InitializeKPageBufferSlabHeap(Core::System& system);
void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout);
} // namespace Kernel::Init
diff --git a/src/core/hle/kernel/initial_process.h b/src/core/hle/kernel/initial_process.h
new file mode 100644
index 000000000..25b27909c
--- /dev/null
+++ b/src/core/hle/kernel/initial_process.h
@@ -0,0 +1,23 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "common/literals.h"
+#include "core/hle/kernel/board/nintendo/nx/k_memory_layout.h"
+#include "core/hle/kernel/board/nintendo/nx/k_system_control.h"
+
+namespace Kernel {
+
+using namespace Common::Literals;
+
+constexpr std::size_t InitialProcessBinarySizeMax = 12_MiB;
+
+static inline PAddr GetInitialProcessBinaryPhysicalAddress() {
+ return Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetKernelPhysicalBaseAddress(
+ MainMemoryAddress);
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index 783c69858..8cdd0490f 100644
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu
}
} else {
// Otherwise, clear our exclusive hold and finish
- monitor.ClearExclusive();
+ monitor.ClearExclusive(current_core);
}
// We're done.
@@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32
}
} else {
// Otherwise, clear our exclusive hold and finish.
- monitor.ClearExclusive();
+ monitor.ClearExclusive(current_core);
}
// We're done.
@@ -115,7 +115,7 @@ ResultCode KAddressArbiter::Signal(VAddr addr, s32 count) {
{
KScopedSchedulerLock sl(kernel);
- auto it = thread_tree.nfind_light({addr, -1});
+ auto it = thread_tree.nfind_key({addr, -1});
while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
(it->GetAddressArbiterKey() == addr)) {
// End the thread's wait.
@@ -148,7 +148,7 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32
return ResultInvalidState;
}
- auto it = thread_tree.nfind_light({addr, -1});
+ auto it = thread_tree.nfind_key({addr, -1});
while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
(it->GetAddressArbiterKey() == addr)) {
// End the thread's wait.
@@ -171,7 +171,7 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32
{
[[maybe_unused]] const KScopedSchedulerLock sl(kernel);
- auto it = thread_tree.nfind_light({addr, -1});
+ auto it = thread_tree.nfind_key({addr, -1});
// Determine the updated value.
s32 new_value{};
if (count <= 0) {
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index aadcc297a..8e2a9593c 100644
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -244,7 +244,7 @@ void KConditionVariable::Signal(u64 cv_key, s32 count) {
{
KScopedSchedulerLock sl(kernel);
- auto it = thread_tree.nfind_light({cv_key, -1});
+ auto it = thread_tree.nfind_key({cv_key, -1});
while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
(it->GetConditionVariableKey() == cv_key)) {
KThread* target_thread = std::addressof(*it);
diff --git a/src/core/hle/kernel/k_memory_layout.h b/src/core/hle/kernel/k_memory_layout.h
index 57ff538cc..0858827b6 100644
--- a/src/core/hle/kernel/k_memory_layout.h
+++ b/src/core/hle/kernel/k_memory_layout.h
@@ -57,11 +57,11 @@ constexpr std::size_t KernelPageTableHeapSize = GetMaximumOverheadSize(MainMemor
constexpr std::size_t KernelInitialPageHeapSize = 128_KiB;
constexpr std::size_t KernelSlabHeapDataSize = 5_MiB;
-constexpr std::size_t KernelSlabHeapGapsSize = 2_MiB - 64_KiB;
-constexpr std::size_t KernelSlabHeapSize = KernelSlabHeapDataSize + KernelSlabHeapGapsSize;
+constexpr std::size_t KernelSlabHeapGapsSizeMax = 2_MiB - 64_KiB;
+constexpr std::size_t KernelSlabHeapSize = KernelSlabHeapDataSize + KernelSlabHeapGapsSizeMax;
// NOTE: This is calculated from KThread slab counts, assuming KThread size <= 0x860.
-constexpr std::size_t KernelSlabHeapAdditionalSize = 416_KiB;
+constexpr std::size_t KernelSlabHeapAdditionalSize = 0x68000;
constexpr std::size_t KernelResourceSize =
KernelPageTableHeapSize + KernelInitialPageHeapSize + KernelSlabHeapSize;
@@ -173,6 +173,10 @@ public:
return Dereference(FindVirtualLinear(address));
}
+ const KMemoryRegion& GetPhysicalLinearRegion(PAddr address) const {
+ return Dereference(FindPhysicalLinear(address));
+ }
+
const KMemoryRegion* GetPhysicalKernelTraceBufferRegion() const {
return GetPhysicalMemoryRegionTree().FindFirstDerived(KMemoryRegionType_KernelTraceBuffer);
}
diff --git a/src/core/hle/kernel/k_memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp
index 1b44541b1..a2f18f643 100644
--- a/src/core/hle/kernel/k_memory_manager.cpp
+++ b/src/core/hle/kernel/k_memory_manager.cpp
@@ -10,189 +10,412 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/device_memory.h"
+#include "core/hle/kernel/initial_process.h"
#include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/kernel/k_page_linked_list.h"
+#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/svc_results.h"
+#include "core/memory.h"
namespace Kernel {
-KMemoryManager::KMemoryManager(Core::System& system_) : system{system_} {}
+namespace {
+
+constexpr KMemoryManager::Pool GetPoolFromMemoryRegionType(u32 type) {
+ if ((type | KMemoryRegionType_DramApplicationPool) == type) {
+ return KMemoryManager::Pool::Application;
+ } else if ((type | KMemoryRegionType_DramAppletPool) == type) {
+ return KMemoryManager::Pool::Applet;
+ } else if ((type | KMemoryRegionType_DramSystemPool) == type) {
+ return KMemoryManager::Pool::System;
+ } else if ((type | KMemoryRegionType_DramSystemNonSecurePool) == type) {
+ return KMemoryManager::Pool::SystemNonSecure;
+ } else {
+ UNREACHABLE_MSG("InvalidMemoryRegionType for conversion to Pool");
+ return {};
+ }
+}
-std::size_t KMemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) {
- const auto size{end_address - start_address};
+} // namespace
+
+KMemoryManager::KMemoryManager(Core::System& system_)
+ : system{system_}, pool_locks{
+ KLightLock{system_.Kernel()},
+ KLightLock{system_.Kernel()},
+ KLightLock{system_.Kernel()},
+ KLightLock{system_.Kernel()},
+ } {}
+
+void KMemoryManager::Initialize(VAddr management_region, size_t management_region_size) {
+
+ // Clear the management region to zero.
+ const VAddr management_region_end = management_region + management_region_size;
+
+ // Reset our manager count.
+ num_managers = 0;
+
+ // Traverse the virtual memory layout tree, initializing each manager as appropriate.
+ while (num_managers != MaxManagerCount) {
+ // Locate the region that should initialize the current manager.
+ PAddr region_address = 0;
+ size_t region_size = 0;
+ Pool region_pool = Pool::Count;
+ for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) {
+ // We only care about regions that we need to create managers for.
+ if (!it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) {
+ continue;
+ }
- // Calculate metadata sizes
- const auto ref_count_size{(size / PageSize) * sizeof(u16)};
- const auto optimize_map_size{(Common::AlignUp((size / PageSize), 64) / 64) * sizeof(u64)};
- const auto manager_size{Common::AlignUp(optimize_map_size + ref_count_size, PageSize)};
- const auto page_heap_size{KPageHeap::CalculateManagementOverheadSize(size)};
- const auto total_metadata_size{manager_size + page_heap_size};
- ASSERT(manager_size <= total_metadata_size);
- ASSERT(Common::IsAligned(total_metadata_size, PageSize));
+ // We want to initialize the managers in order.
+ if (it.GetAttributes() != num_managers) {
+ continue;
+ }
- // Setup region
- pool = new_pool;
+ const PAddr cur_start = it.GetAddress();
+ const PAddr cur_end = it.GetEndAddress();
+
+ // Validate the region.
+ ASSERT(cur_end != 0);
+ ASSERT(cur_start != 0);
+ ASSERT(it.GetSize() > 0);
+
+ // Update the region's extents.
+ if (region_address == 0) {
+ region_address = cur_start;
+ region_size = it.GetSize();
+ region_pool = GetPoolFromMemoryRegionType(it.GetType());
+ } else {
+ ASSERT(cur_start == region_address + region_size);
+
+ // Update the size.
+ region_size = cur_end - region_address;
+ ASSERT(GetPoolFromMemoryRegionType(it.GetType()) == region_pool);
+ }
+ }
+
+ // If we didn't find a region, we're done.
+ if (region_size == 0) {
+ break;
+ }
- // Initialize the manager's KPageHeap
- heap.Initialize(start_address, size, page_heap_size);
+ // Initialize a new manager for the region.
+ Impl* manager = std::addressof(managers[num_managers++]);
+ ASSERT(num_managers <= managers.size());
+
+ const size_t cur_size = manager->Initialize(region_address, region_size, management_region,
+ management_region_end, region_pool);
+ management_region += cur_size;
+ ASSERT(management_region <= management_region_end);
+
+ // Insert the manager into the pool list.
+ const auto region_pool_index = static_cast<u32>(region_pool);
+ if (pool_managers_tail[region_pool_index] == nullptr) {
+ pool_managers_head[region_pool_index] = manager;
+ } else {
+ pool_managers_tail[region_pool_index]->SetNext(manager);
+ manager->SetPrev(pool_managers_tail[region_pool_index]);
+ }
+ pool_managers_tail[region_pool_index] = manager;
+ }
- // Free the memory to the heap
- heap.Free(start_address, size / PageSize);
+ // Free each region to its corresponding heap.
+ size_t reserved_sizes[MaxManagerCount] = {};
+ const PAddr ini_start = GetInitialProcessBinaryPhysicalAddress();
+ const PAddr ini_end = ini_start + InitialProcessBinarySizeMax;
+ const PAddr ini_last = ini_end - 1;
+ for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) {
+ if (it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) {
+ // Get the manager for the region.
+ auto index = it.GetAttributes();
+ auto& manager = managers[index];
+
+ const PAddr cur_start = it.GetAddress();
+ const PAddr cur_last = it.GetLastAddress();
+ const PAddr cur_end = it.GetEndAddress();
+
+ if (cur_start <= ini_start && ini_last <= cur_last) {
+ // Free memory before the ini to the heap.
+ if (cur_start != ini_start) {
+ manager.Free(cur_start, (ini_start - cur_start) / PageSize);
+ }
- // Update the heap's used size
- heap.UpdateUsedSize();
+ // Open/reserve the ini memory.
+ manager.OpenFirst(ini_start, InitialProcessBinarySizeMax / PageSize);
+ reserved_sizes[it.GetAttributes()] += InitialProcessBinarySizeMax;
- return total_metadata_size;
-}
+ // Free memory after the ini to the heap.
+ if (ini_last != cur_last) {
+ ASSERT(cur_end != 0);
+ manager.Free(ini_end, cur_end - ini_end);
+ }
+ } else {
+ // Ensure there's no partial overlap with the ini image.
+ if (cur_start <= ini_last) {
+ ASSERT(cur_last < ini_start);
+ } else {
+ // Otherwise, check the region for general validity.
+ ASSERT(cur_end != 0);
+ }
-void KMemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) {
- ASSERT(pool < Pool::Count);
- managers[static_cast<std::size_t>(pool)].Initialize(pool, start_address, end_address);
+ // Free the memory to the heap.
+ manager.Free(cur_start, it.GetSize() / PageSize);
+ }
+ }
+ }
+
+ // Update the used size for all managers.
+ for (size_t i = 0; i < num_managers; ++i) {
+ managers[i].SetInitialUsedHeapSize(reserved_sizes[i]);
+ }
}
-VAddr KMemoryManager::AllocateAndOpenContinuous(std::size_t num_pages, std::size_t align_pages,
- u32 option) {
- // Early return if we're allocating no pages
+PAddr KMemoryManager::AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option) {
+ // Early return if we're allocating no pages.
if (num_pages == 0) {
- return {};
+ return 0;
}
- // Lock the pool that we're allocating from
+ // Lock the pool that we're allocating from.
const auto [pool, dir] = DecodeOption(option);
- const auto pool_index{static_cast<std::size_t>(pool)};
- std::lock_guard lock{pool_locks[pool_index]};
-
- // Choose a heap based on our page size request
- const s32 heap_index{KPageHeap::GetAlignedBlockIndex(num_pages, align_pages)};
-
- // Loop, trying to iterate from each block
- // TODO (bunnei): Support multiple managers
- Impl& chosen_manager{managers[pool_index]};
- VAddr allocated_block{chosen_manager.AllocateBlock(heap_index, false)};
+ KScopedLightLock lk(pool_locks[static_cast<std::size_t>(pool)]);
+
+ // Choose a heap based on our page size request.
+ const s32 heap_index = KPageHeap::GetAlignedBlockIndex(num_pages, align_pages);
+
+ // Loop, trying to iterate from each block.
+ Impl* chosen_manager = nullptr;
+ PAddr allocated_block = 0;
+ for (chosen_manager = this->GetFirstManager(pool, dir); chosen_manager != nullptr;
+ chosen_manager = this->GetNextManager(chosen_manager, dir)) {
+ allocated_block = chosen_manager->AllocateBlock(heap_index, true);
+ if (allocated_block != 0) {
+ break;
+ }
+ }
- // If we failed to allocate, quit now
- if (!allocated_block) {
- return {};
+ // If we failed to allocate, quit now.
+ if (allocated_block == 0) {
+ return 0;
}
- // If we allocated more than we need, free some
- const auto allocated_pages{KPageHeap::GetBlockNumPages(heap_index)};
+ // If we allocated more than we need, free some.
+ const size_t allocated_pages = KPageHeap::GetBlockNumPages(heap_index);
if (allocated_pages > num_pages) {
- chosen_manager.Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages);
+ chosen_manager->Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages);
}
+ // Open the first reference to the pages.
+ chosen_manager->OpenFirst(allocated_block, num_pages);
+
return allocated_block;
}
-ResultCode KMemoryManager::Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
- Direction dir, u32 heap_fill_value) {
- ASSERT(page_list.GetNumPages() == 0);
+ResultCode KMemoryManager::AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool,
+ Direction dir, bool random) {
+ // Choose a heap based on our page size request.
+ const s32 heap_index = KPageHeap::GetBlockIndex(num_pages);
+ R_UNLESS(0 <= heap_index, ResultOutOfMemory);
+
+ // Ensure that we don't leave anything un-freed.
+ auto group_guard = SCOPE_GUARD({
+ for (const auto& it : out->Nodes()) {
+ auto& manager = this->GetManager(system.Kernel().MemoryLayout(), it.GetAddress());
+ const size_t num_pages_to_free =
+ std::min(it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize);
+ manager.Free(it.GetAddress(), num_pages_to_free);
+ }
+ });
- // Early return if we're allocating no pages
- if (num_pages == 0) {
- return ResultSuccess;
- }
+ // Keep allocating until we've allocated all our pages.
+ for (s32 index = heap_index; index >= 0 && num_pages > 0; index--) {
+ const size_t pages_per_alloc = KPageHeap::GetBlockNumPages(index);
+ for (Impl* cur_manager = this->GetFirstManager(pool, dir); cur_manager != nullptr;
+ cur_manager = this->GetNextManager(cur_manager, dir)) {
+ while (num_pages >= pages_per_alloc) {
+ // Allocate a block.
+ PAddr allocated_block = cur_manager->AllocateBlock(index, random);
+ if (allocated_block == 0) {
+ break;
+ }
- // Lock the pool that we're allocating from
- const auto pool_index{static_cast<std::size_t>(pool)};
- std::lock_guard lock{pool_locks[pool_index]};
+ // Safely add it to our group.
+ {
+ auto block_guard =
+ SCOPE_GUARD({ cur_manager->Free(allocated_block, pages_per_alloc); });
+ R_TRY(out->AddBlock(allocated_block, pages_per_alloc));
+ block_guard.Cancel();
+ }
- // Choose a heap based on our page size request
- const s32 heap_index{KPageHeap::GetBlockIndex(num_pages)};
- if (heap_index < 0) {
- return ResultOutOfMemory;
+ num_pages -= pages_per_alloc;
+ }
+ }
}
- // TODO (bunnei): Support multiple managers
- Impl& chosen_manager{managers[pool_index]};
+ // Only succeed if we allocated as many pages as we wanted.
+ R_UNLESS(num_pages == 0, ResultOutOfMemory);
- // Ensure that we don't leave anything un-freed
- auto group_guard = detail::ScopeExit([&] {
- for (const auto& it : page_list.Nodes()) {
- const auto min_num_pages{std::min<size_t>(
- it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
- chosen_manager.Free(it.GetAddress(), min_num_pages);
- }
- });
+ // We succeeded!
+ group_guard.Cancel();
+ return ResultSuccess;
+}
- // Keep allocating until we've allocated all our pages
- for (s32 index{heap_index}; index >= 0 && num_pages > 0; index--) {
- const auto pages_per_alloc{KPageHeap::GetBlockNumPages(index)};
+ResultCode KMemoryManager::AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option) {
+ ASSERT(out != nullptr);
+ ASSERT(out->GetNumPages() == 0);
- while (num_pages >= pages_per_alloc) {
- // Allocate a block
- VAddr allocated_block{chosen_manager.AllocateBlock(index, false)};
- if (!allocated_block) {
- break;
- }
+ // Early return if we're allocating no pages.
+ R_SUCCEED_IF(num_pages == 0);
- // Safely add it to our group
- {
- auto block_guard = detail::ScopeExit(
- [&] { chosen_manager.Free(allocated_block, pages_per_alloc); });
+ // Lock the pool that we're allocating from.
+ const auto [pool, dir] = DecodeOption(option);
+ KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]);
+
+ // Allocate the page group.
+ R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false));
+
+ // Open the first reference to the pages.
+ for (const auto& block : out->Nodes()) {
+ PAddr cur_address = block.GetAddress();
+ size_t remaining_pages = block.GetNumPages();
+ while (remaining_pages > 0) {
+ // Get the manager for the current address.
+ auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address);
+
+ // Process part or all of the block.
+ const size_t cur_pages =
+ std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address));
+ manager.OpenFirst(cur_address, cur_pages);
+
+ // Advance.
+ cur_address += cur_pages * PageSize;
+ remaining_pages -= cur_pages;
+ }
+ }
- if (const ResultCode result{page_list.AddBlock(allocated_block, pages_per_alloc)};
- result.IsError()) {
- return result;
- }
+ return ResultSuccess;
+}
- block_guard.Cancel();
- }
+ResultCode KMemoryManager::AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages,
+ u32 option, u64 process_id, u8 fill_pattern) {
+ ASSERT(out != nullptr);
+ ASSERT(out->GetNumPages() == 0);
- num_pages -= pages_per_alloc;
- }
- }
+ // Decode the option.
+ const auto [pool, dir] = DecodeOption(option);
- // Clear allocated memory.
- for (const auto& it : page_list.Nodes()) {
- std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value,
- it.GetSize());
+ // Allocate the memory.
+ {
+ // Lock the pool that we're allocating from.
+ KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]);
+
+ // Allocate the page group.
+ R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false));
+
+ // Open the first reference to the pages.
+ for (const auto& block : out->Nodes()) {
+ PAddr cur_address = block.GetAddress();
+ size_t remaining_pages = block.GetNumPages();
+ while (remaining_pages > 0) {
+ // Get the manager for the current address.
+ auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address);
+
+ // Process part or all of the block.
+ const size_t cur_pages =
+ std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address));
+ manager.OpenFirst(cur_address, cur_pages);
+
+ // Advance.
+ cur_address += cur_pages * PageSize;
+ remaining_pages -= cur_pages;
+ }
+ }
}
- // Only succeed if we allocated as many pages as we wanted
- if (num_pages) {
- return ResultOutOfMemory;
+ // Set all the allocated memory.
+ for (const auto& block : out->Nodes()) {
+ std::memset(system.DeviceMemory().GetPointer(block.GetAddress()), fill_pattern,
+ block.GetSize());
}
- // We succeeded!
- group_guard.Cancel();
-
return ResultSuccess;
}
-ResultCode KMemoryManager::Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
- Direction dir, u32 heap_fill_value) {
- // Early return if we're freeing no pages
- if (!num_pages) {
- return ResultSuccess;
+void KMemoryManager::Open(PAddr address, size_t num_pages) {
+ // Repeatedly open references until we've done so for all pages.
+ while (num_pages) {
+ auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address);
+ const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address));
+
+ {
+ KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]);
+ manager.Open(address, cur_pages);
+ }
+
+ num_pages -= cur_pages;
+ address += cur_pages * PageSize;
}
+}
- // Lock the pool that we're freeing from
- const auto pool_index{static_cast<std::size_t>(pool)};
- std::lock_guard lock{pool_locks[pool_index]};
+void KMemoryManager::Close(PAddr address, size_t num_pages) {
+ // Repeatedly close references until we've done so for all pages.
+ while (num_pages) {
+ auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address);
+ const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address));
- // TODO (bunnei): Support multiple managers
- Impl& chosen_manager{managers[pool_index]};
+ {
+ KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]);
+ manager.Close(address, cur_pages);
+ }
- // Free all of the pages
- for (const auto& it : page_list.Nodes()) {
- const auto min_num_pages{std::min<size_t>(
- it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
- chosen_manager.Free(it.GetAddress(), min_num_pages);
+ num_pages -= cur_pages;
+ address += cur_pages * PageSize;
}
+}
- return ResultSuccess;
+void KMemoryManager::Close(const KPageLinkedList& pg) {
+ for (const auto& node : pg.Nodes()) {
+ Close(node.GetAddress(), node.GetNumPages());
+ }
+}
+void KMemoryManager::Open(const KPageLinkedList& pg) {
+ for (const auto& node : pg.Nodes()) {
+ Open(node.GetAddress(), node.GetNumPages());
+ }
+}
+
+size_t KMemoryManager::Impl::Initialize(PAddr address, size_t size, VAddr management,
+ VAddr management_end, Pool p) {
+ // Calculate management sizes.
+ const size_t ref_count_size = (size / PageSize) * sizeof(u16);
+ const size_t optimize_map_size = CalculateOptimizedProcessOverheadSize(size);
+ const size_t manager_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
+ const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(size);
+ const size_t total_management_size = manager_size + page_heap_size;
+ ASSERT(manager_size <= total_management_size);
+ ASSERT(management + total_management_size <= management_end);
+ ASSERT(Common::IsAligned(total_management_size, PageSize));
+
+ // Setup region.
+ pool = p;
+ management_region = management;
+ page_reference_counts.resize(
+ Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize() / PageSize);
+ ASSERT(Common::IsAligned(management_region, PageSize));
+
+ // Initialize the manager's KPageHeap.
+ heap.Initialize(address, size, management + manager_size, page_heap_size);
+
+ return total_management_size;
}
-std::size_t KMemoryManager::Impl::CalculateManagementOverheadSize(std::size_t region_size) {
- const std::size_t ref_count_size = (region_size / PageSize) * sizeof(u16);
- const std::size_t optimize_map_size =
+size_t KMemoryManager::Impl::CalculateManagementOverheadSize(size_t region_size) {
+ const size_t ref_count_size = (region_size / PageSize) * sizeof(u16);
+ const size_t optimize_map_size =
(Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) /
Common::BitSize<u64>()) *
sizeof(u64);
- const std::size_t manager_meta_size =
- Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
- const std::size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size);
+ const size_t manager_meta_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
+ const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size);
return manager_meta_size + page_heap_size;
}
diff --git a/src/core/hle/kernel/k_memory_manager.h b/src/core/hle/kernel/k_memory_manager.h
index 17c7690f1..18775b262 100644
--- a/src/core/hle/kernel/k_memory_manager.h
+++ b/src/core/hle/kernel/k_memory_manager.h
@@ -5,11 +5,12 @@
#pragma once
#include <array>
-#include <mutex>
#include <tuple>
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "core/hle/kernel/k_light_lock.h"
+#include "core/hle/kernel/k_memory_layout.h"
#include "core/hle/kernel/k_page_heap.h"
#include "core/hle/result.h"
@@ -52,22 +53,33 @@ public:
explicit KMemoryManager(Core::System& system_);
- constexpr std::size_t GetSize(Pool pool) const {
- return managers[static_cast<std::size_t>(pool)].GetSize();
+ void Initialize(VAddr management_region, size_t management_region_size);
+
+ constexpr size_t GetSize(Pool pool) const {
+ constexpr Direction GetSizeDirection = Direction::FromFront;
+ size_t total = 0;
+ for (auto* manager = this->GetFirstManager(pool, GetSizeDirection); manager != nullptr;
+ manager = this->GetNextManager(manager, GetSizeDirection)) {
+ total += manager->GetSize();
+ }
+ return total;
}
- void InitializeManager(Pool pool, u64 start_address, u64 end_address);
+ PAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option);
+ ResultCode AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option);
+ ResultCode AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages, u32 option,
+ u64 process_id, u8 fill_pattern);
+
+ static constexpr size_t MaxManagerCount = 10;
- VAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option);
- ResultCode Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir,
- u32 heap_fill_value = 0);
- ResultCode Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir,
- u32 heap_fill_value = 0);
+ void Close(PAddr address, size_t num_pages);
+ void Close(const KPageLinkedList& pg);
- static constexpr std::size_t MaxManagerCount = 10;
+ void Open(PAddr address, size_t num_pages);
+ void Open(const KPageLinkedList& pg);
public:
- static std::size_t CalculateManagementOverheadSize(std::size_t region_size) {
+ static size_t CalculateManagementOverheadSize(size_t region_size) {
return Impl::CalculateManagementOverheadSize(region_size);
}
@@ -100,17 +112,26 @@ private:
Impl() = default;
~Impl() = default;
- std::size_t Initialize(Pool new_pool, u64 start_address, u64 end_address);
+ size_t Initialize(PAddr address, size_t size, VAddr management, VAddr management_end,
+ Pool p);
VAddr AllocateBlock(s32 index, bool random) {
return heap.AllocateBlock(index, random);
}
- void Free(VAddr addr, std::size_t num_pages) {
+ void Free(VAddr addr, size_t num_pages) {
heap.Free(addr, num_pages);
}
- constexpr std::size_t GetSize() const {
+ void SetInitialUsedHeapSize(size_t reserved_size) {
+ heap.SetInitialUsedSize(reserved_size);
+ }
+
+ constexpr Pool GetPool() const {
+ return pool;
+ }
+
+ constexpr size_t GetSize() const {
return heap.GetSize();
}
@@ -122,10 +143,88 @@ private:
return heap.GetEndAddress();
}
- static std::size_t CalculateManagementOverheadSize(std::size_t region_size);
+ constexpr size_t GetPageOffset(PAddr address) const {
+ return heap.GetPageOffset(address);
+ }
+
+ constexpr size_t GetPageOffsetToEnd(PAddr address) const {
+ return heap.GetPageOffsetToEnd(address);
+ }
+
+ constexpr void SetNext(Impl* n) {
+ next = n;
+ }
+
+ constexpr void SetPrev(Impl* n) {
+ prev = n;
+ }
+
+ constexpr Impl* GetNext() const {
+ return next;
+ }
+
+ constexpr Impl* GetPrev() const {
+ return prev;
+ }
+
+ void OpenFirst(PAddr address, size_t num_pages) {
+ size_t index = this->GetPageOffset(address);
+ const size_t end = index + num_pages;
+ while (index < end) {
+ const RefCount ref_count = (++page_reference_counts[index]);
+ ASSERT(ref_count == 1);
- static constexpr std::size_t CalculateOptimizedProcessOverheadSize(
- std::size_t region_size) {
+ index++;
+ }
+ }
+
+ void Open(PAddr address, size_t num_pages) {
+ size_t index = this->GetPageOffset(address);
+ const size_t end = index + num_pages;
+ while (index < end) {
+ const RefCount ref_count = (++page_reference_counts[index]);
+ ASSERT(ref_count > 1);
+
+ index++;
+ }
+ }
+
+ void Close(PAddr address, size_t num_pages) {
+ size_t index = this->GetPageOffset(address);
+ const size_t end = index + num_pages;
+
+ size_t free_start = 0;
+ size_t free_count = 0;
+ while (index < end) {
+ ASSERT(page_reference_counts[index] > 0);
+ const RefCount ref_count = (--page_reference_counts[index]);
+
+ // Keep track of how many zero refcounts we see in a row, to minimize calls to free.
+ if (ref_count == 0) {
+ if (free_count > 0) {
+ free_count++;
+ } else {
+ free_start = index;
+ free_count = 1;
+ }
+ } else {
+ if (free_count > 0) {
+ this->Free(heap.GetAddress() + free_start * PageSize, free_count);
+ free_count = 0;
+ }
+ }
+
+ index++;
+ }
+
+ if (free_count > 0) {
+ this->Free(heap.GetAddress() + free_start * PageSize, free_count);
+ }
+ }
+
+ static size_t CalculateManagementOverheadSize(size_t region_size);
+
+ static constexpr size_t CalculateOptimizedProcessOverheadSize(size_t region_size) {
return (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) /
Common::BitSize<u64>()) *
sizeof(u64);
@@ -135,13 +234,45 @@ private:
using RefCount = u16;
KPageHeap heap;
+ std::vector<RefCount> page_reference_counts;
+ VAddr management_region{};
Pool pool{};
+ Impl* next{};
+ Impl* prev{};
};
private:
+ Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) {
+ return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()];
+ }
+
+ const Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) const {
+ return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()];
+ }
+
+ constexpr Impl* GetFirstManager(Pool pool, Direction dir) const {
+ return dir == Direction::FromBack ? pool_managers_tail[static_cast<size_t>(pool)]
+ : pool_managers_head[static_cast<size_t>(pool)];
+ }
+
+ constexpr Impl* GetNextManager(Impl* cur, Direction dir) const {
+ if (dir == Direction::FromBack) {
+ return cur->GetPrev();
+ } else {
+ return cur->GetNext();
+ }
+ }
+
+ ResultCode AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool,
+ Direction dir, bool random);
+
+private:
Core::System& system;
- std::array<std::mutex, static_cast<std::size_t>(Pool::Count)> pool_locks;
+ std::array<KLightLock, static_cast<size_t>(Pool::Count)> pool_locks;
+ std::array<Impl*, MaxManagerCount> pool_managers_head{};
+ std::array<Impl*, MaxManagerCount> pool_managers_tail{};
std::array<Impl, MaxManagerCount> managers;
+ size_t num_managers{};
};
} // namespace Kernel
diff --git a/src/core/hle/kernel/k_memory_region_type.h b/src/core/hle/kernel/k_memory_region_type.h
index a05e66677..0baeddf51 100644
--- a/src/core/hle/kernel/k_memory_region_type.h
+++ b/src/core/hle/kernel/k_memory_region_type.h
@@ -14,7 +14,8 @@
namespace Kernel {
enum KMemoryRegionType : u32 {
- KMemoryRegionAttr_CarveoutProtected = 0x04000000,
+ KMemoryRegionAttr_CarveoutProtected = 0x02000000,
+ KMemoryRegionAttr_Uncached = 0x04000000,
KMemoryRegionAttr_DidKernelMap = 0x08000000,
KMemoryRegionAttr_ShouldKernelMap = 0x10000000,
KMemoryRegionAttr_UserReadOnly = 0x20000000,
@@ -239,6 +240,11 @@ static_assert(KMemoryRegionType_VirtualDramHeapBase.GetValue() == 0x1A);
static_assert(KMemoryRegionType_VirtualDramKernelPtHeap.GetValue() == 0x2A);
static_assert(KMemoryRegionType_VirtualDramKernelTraceBuffer.GetValue() == 0x4A);
+// UNUSED: .DeriveSparse(2, 2, 0);
+constexpr auto KMemoryRegionType_VirtualDramUnknownDebug =
+ KMemoryRegionType_Dram.DeriveSparse(2, 2, 1);
+static_assert(KMemoryRegionType_VirtualDramUnknownDebug.GetValue() == (0x52));
+
constexpr auto KMemoryRegionType_VirtualDramKernelInitPt =
KMemoryRegionType_VirtualDramHeapBase.Derive(3, 0);
constexpr auto KMemoryRegionType_VirtualDramPoolManagement =
@@ -330,6 +336,8 @@ constexpr KMemoryRegionType GetTypeForVirtualLinearMapping(u32 type_id) {
return KMemoryRegionType_VirtualDramKernelTraceBuffer;
} else if (KMemoryRegionType_DramKernelPtHeap.IsAncestorOf(type_id)) {
return KMemoryRegionType_VirtualDramKernelPtHeap;
+ } else if ((type_id | KMemoryRegionAttr_ShouldKernelMap) == type_id) {
+ return KMemoryRegionType_VirtualDramUnknownDebug;
} else {
return KMemoryRegionType_Dram;
}
diff --git a/src/core/hle/kernel/k_page_buffer.h b/src/core/hle/kernel/k_page_buffer.h
new file mode 100644
index 000000000..0a9451228
--- /dev/null
+++ b/src/core/hle/kernel/k_page_buffer.h
@@ -0,0 +1,34 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "core/device_memory.h"
+#include "core/hle/kernel/memory_types.h"
+
+namespace Kernel {
+
+class KPageBuffer final : public KSlabAllocated<KPageBuffer> {
+public:
+ KPageBuffer() = default;
+
+ static KPageBuffer* FromPhysicalAddress(Core::System& system, PAddr phys_addr) {
+ ASSERT(Common::IsAligned(phys_addr, PageSize));
+ return reinterpret_cast<KPageBuffer*>(system.DeviceMemory().GetPointer(phys_addr));
+ }
+
+private:
+ [[maybe_unused]] alignas(PageSize) std::array<u8, PageSize> m_buffer{};
+};
+
+static_assert(sizeof(KPageBuffer) == PageSize);
+static_assert(alignof(KPageBuffer) == PageSize);
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_heap.cpp b/src/core/hle/kernel/k_page_heap.cpp
index 29d996d62..97a5890a0 100644
--- a/src/core/hle/kernel/k_page_heap.cpp
+++ b/src/core/hle/kernel/k_page_heap.cpp
@@ -7,35 +7,51 @@
namespace Kernel {
-void KPageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) {
- // Check our assumptions
- ASSERT(Common::IsAligned((address), PageSize));
+void KPageHeap::Initialize(PAddr address, size_t size, VAddr management_address,
+ size_t management_size, const size_t* block_shifts,
+ size_t num_block_shifts) {
+ // Check our assumptions.
+ ASSERT(Common::IsAligned(address, PageSize));
ASSERT(Common::IsAligned(size, PageSize));
+ ASSERT(0 < num_block_shifts && num_block_shifts <= NumMemoryBlockPageShifts);
+ const VAddr management_end = management_address + management_size;
- // Set our members
- heap_address = address;
- heap_size = size;
-
- // Setup bitmaps
- metadata.resize(metadata_size / sizeof(u64));
- u64* cur_bitmap_storage{metadata.data()};
- for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) {
- const std::size_t cur_block_shift{MemoryBlockPageShifts[i]};
- const std::size_t next_block_shift{
- (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0};
- cur_bitmap_storage = blocks[i].Initialize(heap_address, heap_size, cur_block_shift,
- next_block_shift, cur_bitmap_storage);
+ // Set our members.
+ m_heap_address = address;
+ m_heap_size = size;
+ m_num_blocks = num_block_shifts;
+
+ // Setup bitmaps.
+ m_management_data.resize(management_size / sizeof(u64));
+ u64* cur_bitmap_storage{m_management_data.data()};
+ for (size_t i = 0; i < num_block_shifts; i++) {
+ const size_t cur_block_shift = block_shifts[i];
+ const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0;
+ cur_bitmap_storage = m_blocks[i].Initialize(m_heap_address, m_heap_size, cur_block_shift,
+ next_block_shift, cur_bitmap_storage);
}
+
+ // Ensure we didn't overextend our bounds.
+ ASSERT(VAddr(cur_bitmap_storage) <= management_end);
+}
+
+size_t KPageHeap::GetNumFreePages() const {
+ size_t num_free = 0;
+
+ for (size_t i = 0; i < m_num_blocks; i++) {
+ num_free += m_blocks[i].GetNumFreePages();
+ }
+
+ return num_free;
}
-VAddr KPageHeap::AllocateBlock(s32 index, bool random) {
- const std::size_t needed_size{blocks[index].GetSize()};
+PAddr KPageHeap::AllocateBlock(s32 index, bool random) {
+ const size_t needed_size = m_blocks[index].GetSize();
- for (s32 i{index}; i < static_cast<s32>(MemoryBlockPageShifts.size()); i++) {
- if (const VAddr addr{blocks[i].PopBlock(random)}; addr) {
- if (const std::size_t allocated_size{blocks[i].GetSize()};
- allocated_size > needed_size) {
- Free(addr + needed_size, (allocated_size - needed_size) / PageSize);
+ for (s32 i = index; i < static_cast<s32>(m_num_blocks); i++) {
+ if (const PAddr addr = m_blocks[i].PopBlock(random); addr != 0) {
+ if (const size_t allocated_size = m_blocks[i].GetSize(); allocated_size > needed_size) {
+ this->Free(addr + needed_size, (allocated_size - needed_size) / PageSize);
}
return addr;
}
@@ -44,34 +60,34 @@ VAddr KPageHeap::AllocateBlock(s32 index, bool random) {
return 0;
}
-void KPageHeap::FreeBlock(VAddr block, s32 index) {
+void KPageHeap::FreeBlock(PAddr block, s32 index) {
do {
- block = blocks[index++].PushBlock(block);
+ block = m_blocks[index++].PushBlock(block);
} while (block != 0);
}
-void KPageHeap::Free(VAddr addr, std::size_t num_pages) {
- // Freeing no pages is a no-op
+void KPageHeap::Free(PAddr addr, size_t num_pages) {
+ // Freeing no pages is a no-op.
if (num_pages == 0) {
return;
}
- // Find the largest block size that we can free, and free as many as possible
- s32 big_index{static_cast<s32>(MemoryBlockPageShifts.size()) - 1};
- const VAddr start{addr};
- const VAddr end{(num_pages * PageSize) + addr};
- VAddr before_start{start};
- VAddr before_end{start};
- VAddr after_start{end};
- VAddr after_end{end};
+ // Find the largest block size that we can free, and free as many as possible.
+ s32 big_index = static_cast<s32>(m_num_blocks) - 1;
+ const PAddr start = addr;
+ const PAddr end = addr + num_pages * PageSize;
+ PAddr before_start = start;
+ PAddr before_end = start;
+ PAddr after_start = end;
+ PAddr after_end = end;
while (big_index >= 0) {
- const std::size_t block_size{blocks[big_index].GetSize()};
- const VAddr big_start{Common::AlignUp((start), block_size)};
- const VAddr big_end{Common::AlignDown((end), block_size)};
+ const size_t block_size = m_blocks[big_index].GetSize();
+ const PAddr big_start = Common::AlignUp(start, block_size);
+ const PAddr big_end = Common::AlignDown(end, block_size);
if (big_start < big_end) {
- // Free as many big blocks as we can
- for (auto block{big_start}; block < big_end; block += block_size) {
- FreeBlock(block, big_index);
+ // Free as many big blocks as we can.
+ for (auto block = big_start; block < big_end; block += block_size) {
+ this->FreeBlock(block, big_index);
}
before_end = big_start;
after_start = big_end;
@@ -81,31 +97,31 @@ void KPageHeap::Free(VAddr addr, std::size_t num_pages) {
}
ASSERT(big_index >= 0);
- // Free space before the big blocks
- for (s32 i{big_index - 1}; i >= 0; i--) {
- const std::size_t block_size{blocks[i].GetSize()};
+ // Free space before the big blocks.
+ for (s32 i = big_index - 1; i >= 0; i--) {
+ const size_t block_size = m_blocks[i].GetSize();
while (before_start + block_size <= before_end) {
before_end -= block_size;
- FreeBlock(before_end, i);
+ this->FreeBlock(before_end, i);
}
}
- // Free space after the big blocks
- for (s32 i{big_index - 1}; i >= 0; i--) {
- const std::size_t block_size{blocks[i].GetSize()};
+ // Free space after the big blocks.
+ for (s32 i = big_index - 1; i >= 0; i--) {
+ const size_t block_size = m_blocks[i].GetSize();
while (after_start + block_size <= after_end) {
- FreeBlock(after_start, i);
+ this->FreeBlock(after_start, i);
after_start += block_size;
}
}
}
-std::size_t KPageHeap::CalculateManagementOverheadSize(std::size_t region_size) {
- std::size_t overhead_size = 0;
- for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) {
- const std::size_t cur_block_shift{MemoryBlockPageShifts[i]};
- const std::size_t next_block_shift{
- (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0};
+size_t KPageHeap::CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts,
+ size_t num_block_shifts) {
+ size_t overhead_size = 0;
+ for (size_t i = 0; i < num_block_shifts; i++) {
+ const size_t cur_block_shift = block_shifts[i];
+ const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0;
overhead_size += KPageHeap::Block::CalculateManagementOverheadSize(
region_size, cur_block_shift, next_block_shift);
}
diff --git a/src/core/hle/kernel/k_page_heap.h b/src/core/hle/kernel/k_page_heap.h
index a65aa28a0..60fff766b 100644
--- a/src/core/hle/kernel/k_page_heap.h
+++ b/src/core/hle/kernel/k_page_heap.h
@@ -23,54 +23,73 @@ public:
KPageHeap() = default;
~KPageHeap() = default;
- constexpr VAddr GetAddress() const {
- return heap_address;
+ constexpr PAddr GetAddress() const {
+ return m_heap_address;
}
- constexpr std::size_t GetSize() const {
- return heap_size;
+ constexpr size_t GetSize() const {
+ return m_heap_size;
}
- constexpr VAddr GetEndAddress() const {
- return GetAddress() + GetSize();
+ constexpr PAddr GetEndAddress() const {
+ return this->GetAddress() + this->GetSize();
}
- constexpr std::size_t GetPageOffset(VAddr block) const {
- return (block - GetAddress()) / PageSize;
+ constexpr size_t GetPageOffset(PAddr block) const {
+ return (block - this->GetAddress()) / PageSize;
+ }
+ constexpr size_t GetPageOffsetToEnd(PAddr block) const {
+ return (this->GetEndAddress() - block) / PageSize;
+ }
+
+ void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address,
+ size_t management_size) {
+ return this->Initialize(heap_address, heap_size, management_address, management_size,
+ MemoryBlockPageShifts.data(), NumMemoryBlockPageShifts);
+ }
+
+ size_t GetFreeSize() const {
+ return this->GetNumFreePages() * PageSize;
}
- void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size);
- VAddr AllocateBlock(s32 index, bool random);
- void Free(VAddr addr, std::size_t num_pages);
+ void SetInitialUsedSize(size_t reserved_size) {
+ // Check that the reserved size is valid.
+ const size_t free_size = this->GetNumFreePages() * PageSize;
+ ASSERT(m_heap_size >= free_size + reserved_size);
- void UpdateUsedSize() {
- used_size = heap_size - (GetNumFreePages() * PageSize);
+ // Set the initial used size.
+ m_initial_used_size = m_heap_size - free_size - reserved_size;
}
- static std::size_t CalculateManagementOverheadSize(std::size_t region_size);
+ PAddr AllocateBlock(s32 index, bool random);
+ void Free(PAddr addr, size_t num_pages);
+
+ static size_t CalculateManagementOverheadSize(size_t region_size) {
+ return CalculateManagementOverheadSize(region_size, MemoryBlockPageShifts.data(),
+ NumMemoryBlockPageShifts);
+ }
- static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) {
- const auto target_pages{std::max(num_pages, align_pages)};
- for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) {
- if (target_pages <=
- (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
+ static constexpr s32 GetAlignedBlockIndex(size_t num_pages, size_t align_pages) {
+ const size_t target_pages = std::max(num_pages, align_pages);
+ for (size_t i = 0; i < NumMemoryBlockPageShifts; i++) {
+ if (target_pages <= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) {
return static_cast<s32>(i);
}
}
return -1;
}
- static constexpr s32 GetBlockIndex(std::size_t num_pages) {
- for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) {
- if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
+ static constexpr s32 GetBlockIndex(size_t num_pages) {
+ for (s32 i = static_cast<s32>(NumMemoryBlockPageShifts) - 1; i >= 0; i--) {
+ if (num_pages >= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) {
return i;
}
}
return -1;
}
- static constexpr std::size_t GetBlockSize(std::size_t index) {
- return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index];
+ static constexpr size_t GetBlockSize(size_t index) {
+ return size_t(1) << MemoryBlockPageShifts[index];
}
- static constexpr std::size_t GetBlockNumPages(std::size_t index) {
+ static constexpr size_t GetBlockNumPages(size_t index) {
return GetBlockSize(index) / PageSize;
}
@@ -83,114 +102,116 @@ private:
Block() = default;
~Block() = default;
- constexpr std::size_t GetShift() const {
- return block_shift;
+ constexpr size_t GetShift() const {
+ return m_block_shift;
}
- constexpr std::size_t GetNextShift() const {
- return next_block_shift;
+ constexpr size_t GetNextShift() const {
+ return m_next_block_shift;
}
- constexpr std::size_t GetSize() const {
- return static_cast<std::size_t>(1) << GetShift();
+ constexpr size_t GetSize() const {
+ return u64(1) << this->GetShift();
}
- constexpr std::size_t GetNumPages() const {
- return GetSize() / PageSize;
+ constexpr size_t GetNumPages() const {
+ return this->GetSize() / PageSize;
}
- constexpr std::size_t GetNumFreeBlocks() const {
- return bitmap.GetNumBits();
+ constexpr size_t GetNumFreeBlocks() const {
+ return m_bitmap.GetNumBits();
}
- constexpr std::size_t GetNumFreePages() const {
- return GetNumFreeBlocks() * GetNumPages();
+ constexpr size_t GetNumFreePages() const {
+ return this->GetNumFreeBlocks() * this->GetNumPages();
}
- u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs,
- u64* bit_storage) {
- // Set shifts
- block_shift = bs;
- next_block_shift = nbs;
-
- // Align up the address
- VAddr end{addr + size};
- const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift)
- : (1ULL << block_shift)};
- addr = Common::AlignDown((addr), align);
- end = Common::AlignUp((end), align);
-
- heap_address = addr;
- end_offset = (end - addr) / (1ULL << block_shift);
- return bitmap.Initialize(bit_storage, end_offset);
+ u64* Initialize(PAddr addr, size_t size, size_t bs, size_t nbs, u64* bit_storage) {
+ // Set shifts.
+ m_block_shift = bs;
+ m_next_block_shift = nbs;
+
+ // Align up the address.
+ PAddr end = addr + size;
+ const size_t align = (m_next_block_shift != 0) ? (u64(1) << m_next_block_shift)
+ : (u64(1) << m_block_shift);
+ addr = Common::AlignDown(addr, align);
+ end = Common::AlignUp(end, align);
+
+ m_heap_address = addr;
+ m_end_offset = (end - addr) / (u64(1) << m_block_shift);
+ return m_bitmap.Initialize(bit_storage, m_end_offset);
}
- VAddr PushBlock(VAddr address) {
- // Set the bit for the free block
- std::size_t offset{(address - heap_address) >> GetShift()};
- bitmap.SetBit(offset);
+ PAddr PushBlock(PAddr address) {
+ // Set the bit for the free block.
+ size_t offset = (address - m_heap_address) >> this->GetShift();
+ m_bitmap.SetBit(offset);
- // If we have a next shift, try to clear the blocks below and return the address
- if (GetNextShift()) {
- const auto diff{1ULL << (GetNextShift() - GetShift())};
+ // If we have a next shift, try to clear the blocks below this one and return the new
+ // address.
+ if (this->GetNextShift()) {
+ const size_t diff = u64(1) << (this->GetNextShift() - this->GetShift());
offset = Common::AlignDown(offset, diff);
- if (bitmap.ClearRange(offset, diff)) {
- return heap_address + (offset << GetShift());
+ if (m_bitmap.ClearRange(offset, diff)) {
+ return m_heap_address + (offset << this->GetShift());
}
}
- // We couldn't coalesce, or we're already as big as possible
- return 0;
+ // We couldn't coalesce, or we're already as big as possible.
+ return {};
}
- VAddr PopBlock(bool random) {
- // Find a free block
- const s64 soffset{bitmap.FindFreeBlock(random)};
+ PAddr PopBlock(bool random) {
+ // Find a free block.
+ s64 soffset = m_bitmap.FindFreeBlock(random);
if (soffset < 0) {
- return 0;
+ return {};
}
- const auto offset{static_cast<std::size_t>(soffset)};
+ const size_t offset = static_cast<size_t>(soffset);
- // Update our tracking and return it
- bitmap.ClearBit(offset);
- return heap_address + (offset << GetShift());
+ // Update our tracking and return it.
+ m_bitmap.ClearBit(offset);
+ return m_heap_address + (offset << this->GetShift());
}
- static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size,
- std::size_t cur_block_shift,
- std::size_t next_block_shift) {
- const auto cur_block_size{(1ULL << cur_block_shift)};
- const auto next_block_size{(1ULL << next_block_shift)};
- const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size};
+ public:
+ static constexpr size_t CalculateManagementOverheadSize(size_t region_size,
+ size_t cur_block_shift,
+ size_t next_block_shift) {
+ const size_t cur_block_size = (u64(1) << cur_block_shift);
+ const size_t next_block_size = (u64(1) << next_block_shift);
+ const size_t align = (next_block_shift != 0) ? next_block_size : cur_block_size;
return KPageBitmap::CalculateManagementOverheadSize(
(align * 2 + Common::AlignUp(region_size, align)) / cur_block_size);
}
private:
- KPageBitmap bitmap;
- VAddr heap_address{};
- uintptr_t end_offset{};
- std::size_t block_shift{};
- std::size_t next_block_shift{};
+ KPageBitmap m_bitmap;
+ PAddr m_heap_address{};
+ uintptr_t m_end_offset{};
+ size_t m_block_shift{};
+ size_t m_next_block_shift{};
};
- constexpr std::size_t GetNumFreePages() const {
- std::size_t num_free{};
-
- for (const auto& block : blocks) {
- num_free += block.GetNumFreePages();
- }
-
- return num_free;
- }
+private:
+ void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address,
+ size_t management_size, const size_t* block_shifts, size_t num_block_shifts);
+ size_t GetNumFreePages() const;
- void FreeBlock(VAddr block, s32 index);
+ void FreeBlock(PAddr block, s32 index);
- static constexpr std::size_t NumMemoryBlockPageShifts{7};
- static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{
+ static constexpr size_t NumMemoryBlockPageShifts{7};
+ static constexpr std::array<size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{
0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E,
};
- VAddr heap_address{};
- std::size_t heap_size{};
- std::size_t used_size{};
- std::array<Block, NumMemoryBlockPageShifts> blocks{};
- std::vector<u64> metadata;
+private:
+ static size_t CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts,
+ size_t num_block_shifts);
+
+private:
+ PAddr m_heap_address{};
+ size_t m_heap_size{};
+ size_t m_initial_used_size{};
+ size_t m_num_blocks{};
+ std::array<Block, NumMemoryBlockPageShifts> m_blocks{};
+ std::vector<u64> m_management_data;
};
} // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index 88aa2a152..02d93b12e 100644
--- a/src/core/hle/kernel/k_page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -273,87 +273,219 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory
R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free,
KMemoryPermission::None, KMemoryPermission::None,
KMemoryAttribute::None, KMemoryAttribute::None));
+ KPageLinkedList pg;
+ R_TRY(system.Kernel().MemoryManager().AllocateAndOpen(
+ &pg, num_pages,
+ KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, allocation_option)));
- KPageLinkedList page_linked_list;
- R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool,
- allocation_option));
- R_TRY(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup));
+ R_TRY(Operate(addr, num_pages, pg, OperationType::MapGroup));
block_manager->Update(addr, num_pages, state, perm);
return ResultSuccess;
}
-ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
+ // Validate the mapping request.
+ R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
+ ResultInvalidMemoryRegion);
+
+ // Lock the table.
KScopedLightLock lk(general_lock);
- const std::size_t num_pages{size / PageSize};
+ // Verify that the source memory is normal heap.
+ KMemoryState src_state{};
+ KMemoryPermission src_perm{};
+ std::size_t num_src_allocator_blocks{};
+ R_TRY(this->CheckMemoryState(&src_state, &src_perm, nullptr, &num_src_allocator_blocks,
+ src_address, size, KMemoryState::All, KMemoryState::Normal,
+ KMemoryPermission::All, KMemoryPermission::UserReadWrite,
+ KMemoryAttribute::All, KMemoryAttribute::None));
- KMemoryState state{};
- KMemoryPermission perm{};
- CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, nullptr, src_addr, size,
- KMemoryState::All, KMemoryState::Normal, KMemoryPermission::All,
- KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask,
- KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
+ // Verify that the destination memory is unmapped.
+ std::size_t num_dst_allocator_blocks{};
+ R_TRY(this->CheckMemoryState(&num_dst_allocator_blocks, dst_address, size, KMemoryState::All,
+ KMemoryState::Free, KMemoryPermission::None,
+ KMemoryPermission::None, KMemoryAttribute::None,
+ KMemoryAttribute::None));
- if (IsRegionMapped(dst_addr, size)) {
- return ResultInvalidCurrentMemory;
- }
+ // Map the code memory.
+ {
+ // Determine the number of pages being operated on.
+ const std::size_t num_pages = size / PageSize;
- KPageLinkedList page_linked_list;
- AddRegionToPages(src_addr, num_pages, page_linked_list);
+ // Create page groups for the memory being mapped.
+ KPageLinkedList pg;
+ AddRegionToPages(src_address, num_pages, pg);
- {
- auto block_guard = detail::ScopeExit(
- [&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); });
+ // Reprotect the source as kernel-read/not mapped.
+ const auto new_perm = static_cast<KMemoryPermission>(KMemoryPermission::KernelRead |
+ KMemoryPermission::NotMapped);
+ R_TRY(Operate(src_address, num_pages, new_perm, OperationType::ChangePermissions));
- CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None,
- OperationType::ChangePermissions));
- CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None));
+ // Ensure that we unprotect the source pages on failure.
+ auto unprot_guard = SCOPE_GUARD({
+ ASSERT(this->Operate(src_address, num_pages, src_perm, OperationType::ChangePermissions)
+ .IsSuccess());
+ });
- block_guard.Cancel();
- }
+ // Map the alias pages.
+ R_TRY(MapPages(dst_address, pg, new_perm));
- block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None,
- KMemoryAttribute::Locked);
- block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode);
+ // We successfully mapped the alias pages, so we don't need to unprotect the src pages on
+ // failure.
+ unprot_guard.Cancel();
+
+ // Apply the memory block updates.
+ block_manager->Update(src_address, num_pages, src_state, new_perm,
+ KMemoryAttribute::Locked);
+ block_manager->Update(dst_address, num_pages, KMemoryState::AliasCode, new_perm,
+ KMemoryAttribute::None);
+ }
return ResultSuccess;
}
-ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
+ // Validate the mapping request.
+ R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
+ ResultInvalidMemoryRegion);
+
+ // Lock the table.
KScopedLightLock lk(general_lock);
- if (!size) {
- return ResultSuccess;
+ // Verify that the source memory is locked normal heap.
+ std::size_t num_src_allocator_blocks{};
+ R_TRY(this->CheckMemoryState(std::addressof(num_src_allocator_blocks), src_address, size,
+ KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
+ KMemoryPermission::None, KMemoryAttribute::All,
+ KMemoryAttribute::Locked));
+
+ // Verify that the destination memory is aliasable code.
+ std::size_t num_dst_allocator_blocks{};
+ R_TRY(this->CheckMemoryStateContiguous(
+ std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias,
+ KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
+ KMemoryAttribute::All, KMemoryAttribute::None));
+
+ // Determine whether any pages being unmapped are code.
+ bool any_code_pages = false;
+ {
+ KMemoryBlockManager::const_iterator it = block_manager->FindIterator(dst_address);
+ while (true) {
+ // Get the memory info.
+ const KMemoryInfo info = it->GetMemoryInfo();
+
+ // Check if the memory has code flag.
+ if ((info.GetState() & KMemoryState::FlagCode) != KMemoryState::None) {
+ any_code_pages = true;
+ break;
+ }
+
+ // Check if we're done.
+ if (dst_address + size - 1 <= info.GetLastAddress()) {
+ break;
+ }
+
+ // Advance.
+ ++it;
+ }
}
- const std::size_t num_pages{size / PageSize};
+ // Ensure that we maintain the instruction cache.
+ bool reprotected_pages = false;
+ SCOPE_EXIT({
+ if (reprotected_pages && any_code_pages) {
+ system.InvalidateCpuInstructionCacheRange(dst_address, size);
+ }
+ });
- CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, nullptr, src_addr, size,
- KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
- KMemoryPermission::None, KMemoryAttribute::Mask,
- KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
+ // Unmap.
+ {
+ // Determine the number of pages being operated on.
+ const std::size_t num_pages = size / PageSize;
- KMemoryState state{};
- CASCADE_CODE(CheckMemoryState(
- &state, nullptr, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias,
- KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
- KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
- CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None,
- KMemoryPermission::None, KMemoryAttribute::Mask,
- KMemoryAttribute::None));
- CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap));
+ // Unmap the aliased copy of the pages.
+ R_TRY(Operate(dst_address, num_pages, KMemoryPermission::None, OperationType::Unmap));
- block_manager->Update(dst_addr, num_pages, KMemoryState::Free);
- block_manager->Update(src_addr, num_pages, KMemoryState::Normal,
- KMemoryPermission::UserReadWrite);
+ // Try to set the permissions for the source pages back to what they should be.
+ R_TRY(Operate(src_address, num_pages, KMemoryPermission::UserReadWrite,
+ OperationType::ChangePermissions));
- system.InvalidateCpuInstructionCacheRange(dst_addr, size);
+ // Apply the memory block updates.
+ block_manager->Update(dst_address, num_pages, KMemoryState::None);
+ block_manager->Update(src_address, num_pages, KMemoryState::Normal,
+ KMemoryPermission::UserReadWrite);
+
+ // Note that we reprotected pages.
+ reprotected_pages = true;
+ }
return ResultSuccess;
}
+VAddr KPageTable::FindFreeArea(VAddr region_start, std::size_t region_num_pages,
+ std::size_t num_pages, std::size_t alignment, std::size_t offset,
+ std::size_t guard_pages) {
+ VAddr address = 0;
+
+ if (num_pages <= region_num_pages) {
+ if (this->IsAslrEnabled()) {
+ // Try to directly find a free area up to 8 times.
+ for (std::size_t i = 0; i < 8; i++) {
+ const std::size_t random_offset =
+ KSystemControl::GenerateRandomRange(
+ 0, (region_num_pages - num_pages - guard_pages) * PageSize / alignment) *
+ alignment;
+ const VAddr candidate =
+ Common::AlignDown((region_start + random_offset), alignment) + offset;
+
+ KMemoryInfo info = this->QueryInfoImpl(candidate);
+
+ if (info.state != KMemoryState::Free) {
+ continue;
+ }
+ if (region_start > candidate) {
+ continue;
+ }
+ if (info.GetAddress() + guard_pages * PageSize > candidate) {
+ continue;
+ }
+
+ const VAddr candidate_end = candidate + (num_pages + guard_pages) * PageSize - 1;
+ if (candidate_end > info.GetLastAddress()) {
+ continue;
+ }
+ if (candidate_end > region_start + region_num_pages * PageSize - 1) {
+ continue;
+ }
+
+ address = candidate;
+ break;
+ }
+ // Fall back to finding the first free area with a random offset.
+ if (address == 0) {
+ // NOTE: Nintendo does not account for guard pages here.
+ // This may theoretically cause an offset to be chosen that cannot be mapped. We
+ // will account for guard pages.
+ const std::size_t offset_pages = KSystemControl::GenerateRandomRange(
+ 0, region_num_pages - num_pages - guard_pages);
+ address = block_manager->FindFreeArea(region_start + offset_pages * PageSize,
+ region_num_pages - offset_pages, num_pages,
+ alignment, offset, guard_pages);
+ }
+ }
+
+ // Find the first free area.
+ if (address == 0) {
+ address = block_manager->FindFreeArea(region_start, region_num_pages, num_pages,
+ alignment, offset, guard_pages);
+ }
+ }
+
+ return address;
+}
+
ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size,
KPageTable& src_page_table, VAddr src_addr) {
KScopedLightLock lk(general_lock);
@@ -443,9 +575,10 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
// Allocate pages for the new memory.
- KPageLinkedList page_linked_list;
- R_TRY(system.Kernel().MemoryManager().Allocate(
- page_linked_list, (size - mapped_size) / PageSize, memory_pool, allocation_option));
+ KPageLinkedList pg;
+ R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess(
+ &pg, (size - mapped_size) / PageSize,
+ KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0));
// Map the memory.
{
@@ -547,7 +680,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
});
// Iterate over the memory.
- auto pg_it = page_linked_list.Nodes().begin();
+ auto pg_it = pg.Nodes().begin();
PAddr pg_phys_addr = pg_it->GetAddress();
size_t pg_pages = pg_it->GetNumPages();
@@ -571,7 +704,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
// Check if we're at the end of the physical block.
if (pg_pages == 0) {
// Ensure there are more pages to map.
- ASSERT(pg_it != page_linked_list.Nodes().end());
+ ASSERT(pg_it != pg.Nodes().end());
// Advance our physical block.
++pg_it;
@@ -841,10 +974,14 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr address, std::size_t size) {
process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
// Update memory blocks.
- system.Kernel().MemoryManager().Free(pg, size / PageSize, memory_pool, allocation_option);
block_manager->Update(address, size / PageSize, KMemoryState::Free, KMemoryPermission::None,
KMemoryAttribute::None);
+ // TODO(bunnei): This is a workaround until the next set of changes, where we add reference
+ // counting for mapped pages. Until then, we must manually close the reference to the page
+ // group.
+ system.Kernel().MemoryManager().Close(pg);
+
// We succeeded.
remap_guard.Cancel();
@@ -980,6 +1117,46 @@ ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list
return ResultSuccess;
}
+ResultCode KPageTable::MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment,
+ PAddr phys_addr, bool is_pa_valid, VAddr region_start,
+ std::size_t region_num_pages, KMemoryState state,
+ KMemoryPermission perm) {
+ ASSERT(Common::IsAligned(alignment, PageSize) && alignment >= PageSize);
+
+ // Ensure this is a valid map request.
+ R_UNLESS(this->CanContain(region_start, region_num_pages * PageSize, state),
+ ResultInvalidCurrentMemory);
+ R_UNLESS(num_pages < region_num_pages, ResultOutOfMemory);
+
+ // Lock the table.
+ KScopedLightLock lk(general_lock);
+
+ // Find a random address to map at.
+ VAddr addr = this->FindFreeArea(region_start, region_num_pages, num_pages, alignment, 0,
+ this->GetNumGuardPages());
+ R_UNLESS(addr != 0, ResultOutOfMemory);
+ ASSERT(Common::IsAligned(addr, alignment));
+ ASSERT(this->CanContain(addr, num_pages * PageSize, state));
+ ASSERT(this->CheckMemoryState(addr, num_pages * PageSize, KMemoryState::All, KMemoryState::Free,
+ KMemoryPermission::None, KMemoryPermission::None,
+ KMemoryAttribute::None, KMemoryAttribute::None)
+ .IsSuccess());
+
+ // Perform mapping operation.
+ if (is_pa_valid) {
+ R_TRY(this->Operate(addr, num_pages, perm, OperationType::Map, phys_addr));
+ } else {
+ UNIMPLEMENTED();
+ }
+
+ // Update the blocks.
+ block_manager->Update(addr, num_pages, state, perm);
+
+ // We successfully mapped the pages.
+ *out_addr = addr;
+ return ResultSuccess;
+}
+
ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list) {
ASSERT(this->IsLockedByCurrentThread());
@@ -1022,6 +1199,30 @@ ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list,
return ResultSuccess;
}
+ResultCode KPageTable::UnmapPages(VAddr address, std::size_t num_pages, KMemoryState state) {
+ // Check that the unmap is in range.
+ const std::size_t size = num_pages * PageSize;
+ R_UNLESS(this->Contains(address, size), ResultInvalidCurrentMemory);
+
+ // Lock the table.
+ KScopedLightLock lk(general_lock);
+
+ // Check the memory state.
+ std::size_t num_allocator_blocks{};
+ R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks), address, size,
+ KMemoryState::All, state, KMemoryPermission::None,
+ KMemoryPermission::None, KMemoryAttribute::All,
+ KMemoryAttribute::None));
+
+ // Perform the unmap.
+ R_TRY(Operate(address, num_pages, KMemoryPermission::None, OperationType::Unmap));
+
+ // Update the blocks.
+ block_manager->Update(address, num_pages, KMemoryState::Free, KMemoryPermission::None);
+
+ return ResultSuccess;
+}
+
ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size,
Svc::MemoryPermission svc_perm) {
const size_t num_pages = size / PageSize;
@@ -1270,9 +1471,16 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
// Allocate pages for the heap extension.
- KPageLinkedList page_linked_list;
- R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, allocation_size / PageSize,
- memory_pool, allocation_option));
+ KPageLinkedList pg;
+ R_TRY(system.Kernel().MemoryManager().AllocateAndOpen(
+ &pg, allocation_size / PageSize,
+ KMemoryManager::EncodeOption(memory_pool, allocation_option)));
+
+ // Clear all the newly allocated pages.
+ for (const auto& it : pg.Nodes()) {
+ std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value,
+ it.GetSize());
+ }
// Map the pages.
{
@@ -1291,7 +1499,7 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
// Map the pages.
const auto num_pages = allocation_size / PageSize;
- R_TRY(Operate(current_heap_end, num_pages, page_linked_list, OperationType::MapGroup));
+ R_TRY(Operate(current_heap_end, num_pages, pg, OperationType::MapGroup));
// Clear all the newly allocated pages.
for (std::size_t cur_page = 0; cur_page < num_pages; ++cur_page) {
@@ -1339,8 +1547,9 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages,
R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
} else {
KPageLinkedList page_group;
- R_TRY(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool,
- allocation_option));
+ R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess(
+ &page_group, needed_num_pages,
+ KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0));
R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
}
@@ -1547,7 +1756,7 @@ ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermiss
return ResultSuccess;
}
-constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
+VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
switch (state) {
case KMemoryState::Free:
case KMemoryState::Kernel:
@@ -1583,7 +1792,7 @@ constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
}
}
-constexpr std::size_t KPageTable::GetRegionSize(KMemoryState state) const {
+std::size_t KPageTable::GetRegionSize(KMemoryState state) const {
switch (state) {
case KMemoryState::Free:
case KMemoryState::Kernel:
diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h
index c98887d34..54c6adf8d 100644
--- a/src/core/hle/kernel/k_page_table.h
+++ b/src/core/hle/kernel/k_page_table.h
@@ -36,8 +36,8 @@ public:
KMemoryManager::Pool pool);
ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state,
KMemoryPermission perm);
- ResultCode MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
- ResultCode UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
+ ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
+ ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table,
VAddr src_addr);
ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);
@@ -46,7 +46,14 @@ public:
ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state,
KMemoryPermission perm);
+ ResultCode MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment,
+ PAddr phys_addr, KMemoryState state, KMemoryPermission perm) {
+ return this->MapPages(out_addr, num_pages, alignment, phys_addr, true,
+ this->GetRegionAddress(state), this->GetRegionSize(state) / PageSize,
+ state, perm);
+ }
ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state);
+ ResultCode UnmapPages(VAddr address, std::size_t num_pages, KMemoryState state);
ResultCode SetProcessMemoryPermission(VAddr addr, std::size_t size,
Svc::MemoryPermission svc_perm);
KMemoryInfo QueryInfo(VAddr addr);
@@ -91,6 +98,9 @@ private:
ResultCode InitializeMemoryLayout(VAddr start, VAddr end);
ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list,
KMemoryPermission perm);
+ ResultCode MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment,
+ PAddr phys_addr, bool is_pa_valid, VAddr region_start,
+ std::size_t region_num_pages, KMemoryState state, KMemoryPermission perm);
ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list);
bool IsRegionMapped(VAddr address, u64 size);
bool IsRegionContiguous(VAddr addr, u64 size) const;
@@ -102,8 +112,11 @@ private:
OperationType operation);
ResultCode Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm,
OperationType operation, PAddr map_addr = 0);
- constexpr VAddr GetRegionAddress(KMemoryState state) const;
- constexpr std::size_t GetRegionSize(KMemoryState state) const;
+ VAddr GetRegionAddress(KMemoryState state) const;
+ std::size_t GetRegionSize(KMemoryState state) const;
+
+ VAddr FindFreeArea(VAddr region_start, std::size_t region_num_pages, std::size_t num_pages,
+ std::size_t alignment, std::size_t offset, std::size_t guard_pages);
ResultCode CheckMemoryStateContiguous(std::size_t* out_blocks_needed, VAddr addr,
std::size_t size, KMemoryState state_mask,
@@ -137,7 +150,7 @@ private:
return CheckMemoryState(nullptr, nullptr, nullptr, out_blocks_needed, addr, size,
state_mask, state, perm_mask, perm, attr_mask, attr, ignore_attr);
}
- ResultCode CheckMemoryState(VAddr addr, size_t size, KMemoryState state_mask,
+ ResultCode CheckMemoryState(VAddr addr, std::size_t size, KMemoryState state_mask,
KMemoryState state, KMemoryPermission perm_mask,
KMemoryPermission perm, KMemoryAttribute attr_mask,
KMemoryAttribute attr,
@@ -210,7 +223,7 @@ public:
constexpr VAddr GetAliasCodeRegionSize() const {
return alias_code_region_end - alias_code_region_start;
}
- size_t GetNormalMemorySize() {
+ std::size_t GetNormalMemorySize() {
KScopedLightLock lk(general_lock);
return GetHeapSize() + mapped_physical_memory_size;
}
@@ -253,9 +266,10 @@ public:
constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const {
return !IsOutsideASLRRegion(address, size);
}
-
- PAddr GetPhysicalAddr(VAddr addr) {
- ASSERT(IsLockedByCurrentThread());
+ constexpr std::size_t GetNumGuardPages() const {
+ return IsKernel() ? 1 : 4;
+ }
+ PAddr GetPhysicalAddr(VAddr addr) const {
const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits];
ASSERT(backing_addr);
return backing_addr + addr;
@@ -276,10 +290,6 @@ private:
return is_aslr_enabled;
}
- constexpr std::size_t GetNumGuardPages() const {
- return IsKernel() ? 1 : 4;
- }
-
constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const {
return (address_space_start <= addr) &&
(num_pages <= (address_space_end - address_space_start) / PageSize) &&
@@ -311,6 +321,8 @@ private:
bool is_kernel{};
bool is_aslr_enabled{};
+ u32 heap_fill_value{};
+
KMemoryManager::Pool memory_pool{KMemoryManager::Pool::Application};
KMemoryManager::Direction allocation_option{KMemoryManager::Direction::FromFront};
diff --git a/src/core/hle/kernel/k_port.cpp b/src/core/hle/kernel/k_port.cpp
index a8ba09c4a..ceb98709f 100644
--- a/src/core/hle/kernel/k_port.cpp
+++ b/src/core/hle/kernel/k_port.cpp
@@ -57,7 +57,12 @@ ResultCode KPort::EnqueueSession(KServerSession* session) {
R_UNLESS(state == State::Normal, ResultPortClosed);
server.EnqueueSession(session);
- server.GetSessionRequestHandler()->ClientConnected(server.AcceptSession());
+
+ if (auto session_ptr = server.GetSessionRequestHandler().lock()) {
+ session_ptr->ClientConnected(server.AcceptSession());
+ } else {
+ UNREACHABLE();
+ }
return ResultSuccess;
}
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 9233261cd..b39405496 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -70,58 +70,6 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority
}
} // Anonymous namespace
-// Represents a page used for thread-local storage.
-//
-// Each TLS page contains slots that may be used by processes and threads.
-// Every process and thread is created with a slot in some arbitrary page
-// (whichever page happens to have an available slot).
-class TLSPage {
-public:
- static constexpr std::size_t num_slot_entries =
- Core::Memory::PAGE_SIZE / Core::Memory::TLS_ENTRY_SIZE;
-
- explicit TLSPage(VAddr address) : base_address{address} {}
-
- bool HasAvailableSlots() const {
- return !is_slot_used.all();
- }
-
- VAddr GetBaseAddress() const {
- return base_address;
- }
-
- std::optional<VAddr> ReserveSlot() {
- for (std::size_t i = 0; i < is_slot_used.size(); i++) {
- if (is_slot_used[i]) {
- continue;
- }
-
- is_slot_used[i] = true;
- return base_address + (i * Core::Memory::TLS_ENTRY_SIZE);
- }
-
- return std::nullopt;
- }
-
- void ReleaseSlot(VAddr address) {
- // Ensure that all given addresses are consistent with how TLS pages
- // are intended to be used when releasing slots.
- ASSERT(IsWithinPage(address));
- ASSERT((address % Core::Memory::TLS_ENTRY_SIZE) == 0);
-
- const std::size_t index = (address - base_address) / Core::Memory::TLS_ENTRY_SIZE;
- is_slot_used[index] = false;
- }
-
-private:
- bool IsWithinPage(VAddr address) const {
- return base_address <= address && address < base_address + Core::Memory::PAGE_SIZE;
- }
-
- VAddr base_address;
- std::bitset<num_slot_entries> is_slot_used;
-};
-
ResultCode KProcess::Initialize(KProcess* process, Core::System& system, std::string process_name,
ProcessType type, KResourceLimit* res_limit) {
auto& kernel = system.Kernel();
@@ -404,7 +352,7 @@ ResultCode KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
}
// Create TLS region
- tls_region_address = CreateTLSRegion();
+ R_TRY(this->CreateThreadLocalRegion(std::addressof(tls_region_address)));
memory_reservation.Commit();
return handle_table.Initialize(capabilities.GetHandleTableSize());
@@ -444,7 +392,7 @@ void KProcess::PrepareForTermination() {
stop_threads(kernel.System().GlobalSchedulerContext().GetThreadList());
- FreeTLSRegion(tls_region_address);
+ this->DeleteThreadLocalRegion(tls_region_address);
tls_region_address = 0;
if (resource_limit) {
@@ -456,9 +404,6 @@ void KProcess::PrepareForTermination() {
}
void KProcess::Finalize() {
- // Finalize the handle table and close any open handles.
- handle_table.Finalize();
-
// Free all shared memory infos.
{
auto it = shared_memory_list.begin();
@@ -483,67 +428,110 @@ void KProcess::Finalize() {
resource_limit = nullptr;
}
+ // Finalize the page table.
+ page_table.reset();
+
// Perform inherited finalization.
KAutoObjectWithSlabHeapAndContainer<KProcess, KWorkerTask>::Finalize();
}
-/**
- * Attempts to find a TLS page that contains a free slot for
- * use by a thread.
- *
- * @returns If a page with an available slot is found, then an iterator
- * pointing to the page is returned. Otherwise the end iterator
- * is returned instead.
- */
-static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
- return std::find_if(tls_pages.begin(), tls_pages.end(),
- [](const auto& page) { return page.HasAvailableSlots(); });
-}
+ResultCode KProcess::CreateThreadLocalRegion(VAddr* out) {
+ KThreadLocalPage* tlp = nullptr;
+ VAddr tlr = 0;
-VAddr KProcess::CreateTLSRegion() {
- KScopedSchedulerLock lock(kernel);
- if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)};
- tls_page_iter != tls_pages.cend()) {
- return *tls_page_iter->ReserveSlot();
- }
+ // See if we can get a region from a partially used TLP.
+ {
+ KScopedSchedulerLock sl{kernel};
- Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()};
- ASSERT(tls_page_ptr);
+ if (auto it = partially_used_tlp_tree.begin(); it != partially_used_tlp_tree.end()) {
+ tlr = it->Reserve();
+ ASSERT(tlr != 0);
- const VAddr start{page_table->GetKernelMapRegionStart()};
- const VAddr size{page_table->GetKernelMapRegionEnd() - start};
- const PAddr tls_map_addr{kernel.System().DeviceMemory().GetPhysicalAddr(tls_page_ptr)};
- const VAddr tls_page_addr{page_table
- ->AllocateAndMapMemory(1, PageSize, true, start, size / PageSize,
- KMemoryState::ThreadLocal,
- KMemoryPermission::UserReadWrite,
- tls_map_addr)
- .ValueOr(0)};
+ if (it->IsAllUsed()) {
+ tlp = std::addressof(*it);
+ partially_used_tlp_tree.erase(it);
+ fully_used_tlp_tree.insert(*tlp);
+ }
- ASSERT(tls_page_addr);
+ *out = tlr;
+ return ResultSuccess;
+ }
+ }
- std::memset(tls_page_ptr, 0, PageSize);
- tls_pages.emplace_back(tls_page_addr);
+ // Allocate a new page.
+ tlp = KThreadLocalPage::Allocate(kernel);
+ R_UNLESS(tlp != nullptr, ResultOutOfMemory);
+ auto tlp_guard = SCOPE_GUARD({ KThreadLocalPage::Free(kernel, tlp); });
- const auto reserve_result{tls_pages.back().ReserveSlot()};
- ASSERT(reserve_result.has_value());
+ // Initialize the new page.
+ R_TRY(tlp->Initialize(kernel, this));
+
+ // Reserve a TLR.
+ tlr = tlp->Reserve();
+ ASSERT(tlr != 0);
+
+ // Insert into our tree.
+ {
+ KScopedSchedulerLock sl{kernel};
+ if (tlp->IsAllUsed()) {
+ fully_used_tlp_tree.insert(*tlp);
+ } else {
+ partially_used_tlp_tree.insert(*tlp);
+ }
+ }
- return *reserve_result;
+ // We succeeded!
+ tlp_guard.Cancel();
+ *out = tlr;
+ return ResultSuccess;
}
-void KProcess::FreeTLSRegion(VAddr tls_address) {
- KScopedSchedulerLock lock(kernel);
- const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE);
- auto iter =
- std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
- return page.GetBaseAddress() == aligned_address;
- });
+ResultCode KProcess::DeleteThreadLocalRegion(VAddr addr) {
+ KThreadLocalPage* page_to_free = nullptr;
+
+ // Release the region.
+ {
+ KScopedSchedulerLock sl{kernel};
+
+ // Try to find the page in the partially used list.
+ auto it = partially_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize));
+ if (it == partially_used_tlp_tree.end()) {
+ // If we don't find it, it has to be in the fully used list.
+ it = fully_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize));
+ R_UNLESS(it != fully_used_tlp_tree.end(), ResultInvalidAddress);
+
+ // Release the region.
+ it->Release(addr);
+
+ // Move the page out of the fully used list.
+ KThreadLocalPage* tlp = std::addressof(*it);
+ fully_used_tlp_tree.erase(it);
+ if (tlp->IsAllFree()) {
+ page_to_free = tlp;
+ } else {
+ partially_used_tlp_tree.insert(*tlp);
+ }
+ } else {
+ // Release the region.
+ it->Release(addr);
+
+ // Handle the all-free case.
+ KThreadLocalPage* tlp = std::addressof(*it);
+ if (tlp->IsAllFree()) {
+ partially_used_tlp_tree.erase(it);
+ page_to_free = tlp;
+ }
+ }
+ }
+
+ // If we should free the page it was in, do so.
+ if (page_to_free != nullptr) {
+ page_to_free->Finalize();
- // Something has gone very wrong if we're freeing a region
- // with no actual page available.
- ASSERT(iter != tls_pages.cend());
+ KThreadLocalPage::Free(kernel, page_to_free);
+ }
- iter->ReleaseSlot(tls_address);
+ return ResultSuccess;
}
void KProcess::LoadModule(CodeSet code_set, VAddr base_addr) {
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index cf1b67428..5ed0f2d83 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -15,6 +15,7 @@
#include "core/hle/kernel/k_condition_variable.h"
#include "core/hle/kernel/k_handle_table.h"
#include "core/hle/kernel/k_synchronization_object.h"
+#include "core/hle/kernel/k_thread_local_page.h"
#include "core/hle/kernel/k_worker_task.h"
#include "core/hle/kernel/process_capability.h"
#include "core/hle/kernel/slab_helpers.h"
@@ -362,10 +363,10 @@ public:
// Thread-local storage management
// Marks the next available region as used and returns the address of the slot.
- [[nodiscard]] VAddr CreateTLSRegion();
+ [[nodiscard]] ResultCode CreateThreadLocalRegion(VAddr* out);
// Frees a used TLS slot identified by the given address
- void FreeTLSRegion(VAddr tls_address);
+ ResultCode DeleteThreadLocalRegion(VAddr addr);
private:
void PinThread(s32 core_id, KThread* thread) {
@@ -413,13 +414,6 @@ private:
/// The ideal CPU core for this process, threads are scheduled on this core by default.
u8 ideal_core = 0;
- /// The Thread Local Storage area is allocated as processes create threads,
- /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
- /// holds the TLS for a specific thread. This vector contains which parts are in use for each
- /// page as a bitmask.
- /// This vector will grow as more pages are allocated for new threads.
- std::vector<TLSPage> tls_pages;
-
/// Contains the parsed process capability descriptors.
ProcessCapabilities capabilities;
@@ -482,6 +476,12 @@ private:
KThread* exception_thread{};
KLightLock state_lock;
+
+ using TLPTree =
+ Common::IntrusiveRedBlackTreeBaseTraits<KThreadLocalPage>::TreeType<KThreadLocalPage>;
+ using TLPIterator = TLPTree::iterator;
+ TLPTree fully_used_tlp_tree;
+ TLPTree partially_used_tlp_tree;
};
} // namespace Kernel
diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h
index 6302d5e61..2185736be 100644
--- a/src/core/hle/kernel/k_server_port.h
+++ b/src/core/hle/kernel/k_server_port.h
@@ -30,11 +30,11 @@ public:
/// Whether or not this server port has an HLE handler available.
bool HasSessionRequestHandler() const {
- return session_handler != nullptr;
+ return !session_handler.expired();
}
/// Gets the HLE handler for this port.
- SessionRequestHandlerPtr GetSessionRequestHandler() const {
+ SessionRequestHandlerWeakPtr GetSessionRequestHandler() const {
return session_handler;
}
@@ -42,7 +42,7 @@ public:
* Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
* will inherit a reference to this handler.
*/
- void SetSessionHandler(SessionRequestHandlerPtr&& handler) {
+ void SetSessionHandler(SessionRequestHandlerWeakPtr&& handler) {
session_handler = std::move(handler);
}
@@ -66,7 +66,7 @@ private:
void CleanupSessions();
SessionList session_list;
- SessionRequestHandlerPtr session_handler;
+ SessionRequestHandlerWeakPtr session_handler;
KPort* parent{};
};
diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp
index 4d94eb9cf..30c56ff29 100644
--- a/src/core/hle/kernel/k_server_session.cpp
+++ b/src/core/hle/kernel/k_server_session.cpp
@@ -27,10 +27,7 @@ namespace Kernel {
KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {}
-KServerSession::~KServerSession() {
- // Ensure that the global list tracking server sessions does not hold on to a reference.
- kernel.UnregisterServerSession(this);
-}
+KServerSession::~KServerSession() = default;
void KServerSession::Initialize(KSession* parent_session_, std::string&& name_,
std::shared_ptr<SessionRequestManager> manager_) {
@@ -49,6 +46,9 @@ void KServerSession::Destroy() {
parent->OnServerClosed();
parent->Close();
+
+ // Release host emulation members.
+ manager.reset();
}
void KServerSession::OnClientClosed() {
@@ -98,7 +98,12 @@ ResultCode KServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& co
UNREACHABLE();
return ResultSuccess; // Ignore error if asserts are off
}
- return manager->DomainHandler(object_id - 1)->HandleSyncRequest(*this, context);
+ if (auto strong_ptr = manager->DomainHandler(object_id - 1).lock()) {
+ return strong_ptr->HandleSyncRequest(*this, context);
+ } else {
+ UNREACHABLE();
+ return ResultSuccess;
+ }
case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
diff --git a/src/core/hle/kernel/k_slab_heap.h b/src/core/hle/kernel/k_slab_heap.h
index 05c0bec9c..5690cc757 100644
--- a/src/core/hle/kernel/k_slab_heap.h
+++ b/src/core/hle/kernel/k_slab_heap.h
@@ -16,39 +16,34 @@ class KernelCore;
namespace impl {
-class KSlabHeapImpl final {
-public:
+class KSlabHeapImpl {
YUZU_NON_COPYABLE(KSlabHeapImpl);
YUZU_NON_MOVEABLE(KSlabHeapImpl);
+public:
struct Node {
Node* next{};
};
+public:
constexpr KSlabHeapImpl() = default;
- constexpr ~KSlabHeapImpl() = default;
- void Initialize(std::size_t size) {
- ASSERT(head == nullptr);
- obj_size = size;
- }
-
- constexpr std::size_t GetObjectSize() const {
- return obj_size;
+ void Initialize() {
+ ASSERT(m_head == nullptr);
}
Node* GetHead() const {
- return head;
+ return m_head;
}
void* Allocate() {
- Node* ret = head.load();
+ Node* ret = m_head.load();
do {
if (ret == nullptr) {
break;
}
- } while (!head.compare_exchange_weak(ret, ret->next));
+ } while (!m_head.compare_exchange_weak(ret, ret->next));
return ret;
}
@@ -56,170 +51,157 @@ public:
void Free(void* obj) {
Node* node = static_cast<Node*>(obj);
- Node* cur_head = head.load();
+ Node* cur_head = m_head.load();
do {
node->next = cur_head;
- } while (!head.compare_exchange_weak(cur_head, node));
+ } while (!m_head.compare_exchange_weak(cur_head, node));
}
private:
- std::atomic<Node*> head{};
- std::size_t obj_size{};
+ std::atomic<Node*> m_head{};
};
} // namespace impl
-class KSlabHeapBase {
-public:
+template <bool SupportDynamicExpansion>
+class KSlabHeapBase : protected impl::KSlabHeapImpl {
YUZU_NON_COPYABLE(KSlabHeapBase);
YUZU_NON_MOVEABLE(KSlabHeapBase);
- constexpr KSlabHeapBase() = default;
- constexpr ~KSlabHeapBase() = default;
+private:
+ size_t m_obj_size{};
+ uintptr_t m_peak{};
+ uintptr_t m_start{};
+ uintptr_t m_end{};
- constexpr bool Contains(uintptr_t addr) const {
- return start <= addr && addr < end;
- }
+private:
+ void UpdatePeakImpl(uintptr_t obj) {
+ static_assert(std::atomic_ref<uintptr_t>::is_always_lock_free);
+ std::atomic_ref<uintptr_t> peak_ref(m_peak);
- constexpr std::size_t GetSlabHeapSize() const {
- return (end - start) / GetObjectSize();
+ const uintptr_t alloc_peak = obj + this->GetObjectSize();
+ uintptr_t cur_peak = m_peak;
+ do {
+ if (alloc_peak <= cur_peak) {
+ break;
+ }
+ } while (!peak_ref.compare_exchange_strong(cur_peak, alloc_peak));
}
- constexpr std::size_t GetObjectSize() const {
- return impl.GetObjectSize();
- }
+public:
+ constexpr KSlabHeapBase() = default;
- constexpr uintptr_t GetSlabHeapAddress() const {
- return start;
+ bool Contains(uintptr_t address) const {
+ return m_start <= address && address < m_end;
}
- std::size_t GetObjectIndexImpl(const void* obj) const {
- return (reinterpret_cast<uintptr_t>(obj) - start) / GetObjectSize();
+ void Initialize(size_t obj_size, void* memory, size_t memory_size) {
+ // Ensure we don't initialize a slab using null memory.
+ ASSERT(memory != nullptr);
+
+ // Set our object size.
+ m_obj_size = obj_size;
+
+ // Initialize the base allocator.
+ KSlabHeapImpl::Initialize();
+
+ // Set our tracking variables.
+ const size_t num_obj = (memory_size / obj_size);
+ m_start = reinterpret_cast<uintptr_t>(memory);
+ m_end = m_start + num_obj * obj_size;
+ m_peak = m_start;
+
+ // Free the objects.
+ u8* cur = reinterpret_cast<u8*>(m_end);
+
+ for (size_t i = 0; i < num_obj; i++) {
+ cur -= obj_size;
+ KSlabHeapImpl::Free(cur);
+ }
}
- std::size_t GetPeakIndex() const {
- return GetObjectIndexImpl(reinterpret_cast<const void*>(peak));
+ size_t GetSlabHeapSize() const {
+ return (m_end - m_start) / this->GetObjectSize();
}
- void* AllocateImpl() {
- return impl.Allocate();
+ size_t GetObjectSize() const {
+ return m_obj_size;
}
- void FreeImpl(void* obj) {
- // Don't allow freeing an object that wasn't allocated from this heap
- ASSERT(Contains(reinterpret_cast<uintptr_t>(obj)));
+ void* Allocate() {
+ void* obj = KSlabHeapImpl::Allocate();
- impl.Free(obj);
+ return obj;
}
- void InitializeImpl(std::size_t obj_size, void* memory, std::size_t memory_size) {
- // Ensure we don't initialize a slab using null memory
- ASSERT(memory != nullptr);
-
- // Initialize the base allocator
- impl.Initialize(obj_size);
+ void Free(void* obj) {
+ // Don't allow freeing an object that wasn't allocated from this heap.
+ const bool contained = this->Contains(reinterpret_cast<uintptr_t>(obj));
+ ASSERT(contained);
+ KSlabHeapImpl::Free(obj);
+ }
- // Set our tracking variables
- const std::size_t num_obj = (memory_size / obj_size);
- start = reinterpret_cast<uintptr_t>(memory);
- end = start + num_obj * obj_size;
- peak = start;
+ size_t GetObjectIndex(const void* obj) const {
+ if constexpr (SupportDynamicExpansion) {
+ if (!this->Contains(reinterpret_cast<uintptr_t>(obj))) {
+ return std::numeric_limits<size_t>::max();
+ }
+ }
- // Free the objects
- u8* cur = reinterpret_cast<u8*>(end);
+ return (reinterpret_cast<uintptr_t>(obj) - m_start) / this->GetObjectSize();
+ }
- for (std::size_t i{}; i < num_obj; i++) {
- cur -= obj_size;
- impl.Free(cur);
- }
+ size_t GetPeakIndex() const {
+ return this->GetObjectIndex(reinterpret_cast<const void*>(m_peak));
}
-private:
- using Impl = impl::KSlabHeapImpl;
+ uintptr_t GetSlabHeapAddress() const {
+ return m_start;
+ }
- Impl impl;
- uintptr_t peak{};
- uintptr_t start{};
- uintptr_t end{};
+ size_t GetNumRemaining() const {
+ // Only calculate the number of remaining objects under debug configuration.
+ return 0;
+ }
};
template <typename T>
-class KSlabHeap final : public KSlabHeapBase {
-public:
- enum class AllocationType {
- Host,
- Guest,
- };
+class KSlabHeap final : public KSlabHeapBase<false> {
+private:
+ using BaseHeap = KSlabHeapBase<false>;
- explicit constexpr KSlabHeap(AllocationType allocation_type_ = AllocationType::Host)
- : KSlabHeapBase(), allocation_type{allocation_type_} {}
+public:
+ constexpr KSlabHeap() = default;
- void Initialize(void* memory, std::size_t memory_size) {
- if (allocation_type == AllocationType::Guest) {
- InitializeImpl(sizeof(T), memory, memory_size);
- }
+ void Initialize(void* memory, size_t memory_size) {
+ BaseHeap::Initialize(sizeof(T), memory, memory_size);
}
T* Allocate() {
- switch (allocation_type) {
- case AllocationType::Host:
- // Fallback for cases where we do not yet support allocating guest memory from the slab
- // heap, such as for kernel memory regions.
- return new T;
-
- case AllocationType::Guest:
- T* obj = static_cast<T*>(AllocateImpl());
- if (obj != nullptr) {
- new (obj) T();
- }
- return obj;
- }
+ T* obj = static_cast<T*>(BaseHeap::Allocate());
- UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type);
- return nullptr;
+ if (obj != nullptr) [[likely]] {
+ std::construct_at(obj);
+ }
+ return obj;
}
- T* AllocateWithKernel(KernelCore& kernel) {
- switch (allocation_type) {
- case AllocationType::Host:
- // Fallback for cases where we do not yet support allocating guest memory from the slab
- // heap, such as for kernel memory regions.
- return new T(kernel);
+ T* Allocate(KernelCore& kernel) {
+ T* obj = static_cast<T*>(BaseHeap::Allocate());
- case AllocationType::Guest:
- T* obj = static_cast<T*>(AllocateImpl());
- if (obj != nullptr) {
- new (obj) T(kernel);
- }
- return obj;
+ if (obj != nullptr) [[likely]] {
+ std::construct_at(obj, kernel);
}
-
- UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type);
- return nullptr;
+ return obj;
}
void Free(T* obj) {
- switch (allocation_type) {
- case AllocationType::Host:
- // Fallback for cases where we do not yet support allocating guest memory from the slab
- // heap, such as for kernel memory regions.
- delete obj;
- return;
-
- case AllocationType::Guest:
- FreeImpl(obj);
- return;
- }
-
- UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type);
+ BaseHeap::Free(obj);
}
- constexpr std::size_t GetObjectIndex(const T* obj) const {
- return GetObjectIndexImpl(obj);
+ size_t GetObjectIndex(const T* obj) const {
+ return BaseHeap::GetObjectIndex(obj);
}
-
-private:
- const AllocationType allocation_type;
};
} // namespace Kernel
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index de3ffe0c7..ba7f72c6b 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -210,7 +210,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
if (owner != nullptr) {
// Setup the TLS, if needed.
if (type == ThreadType::User) {
- tls_address = owner->CreateTLSRegion();
+ R_TRY(owner->CreateThreadLocalRegion(std::addressof(tls_address)));
}
parent = owner;
@@ -305,7 +305,7 @@ void KThread::Finalize() {
// If the thread has a local region, delete it.
if (tls_address != 0) {
- parent->FreeTLSRegion(tls_address);
+ ASSERT(parent->DeleteThreadLocalRegion(tls_address).IsSuccess());
}
// Release any waiters.
@@ -326,6 +326,9 @@ void KThread::Finalize() {
}
}
+ // Release host emulation members.
+ host_context.reset();
+
// Perform inherited finalization.
KSynchronizationObject::Finalize();
}
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index d058db62c..f46db7298 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -656,7 +656,7 @@ private:
static_assert(sizeof(SyncObjectBuffer::sync_objects) == sizeof(SyncObjectBuffer::handles));
struct ConditionVariableComparator {
- struct LightCompareType {
+ struct RedBlackKeyType {
u64 cv_key{};
s32 priority{};
@@ -672,8 +672,8 @@ private:
template <typename T>
requires(
std::same_as<T, KThread> ||
- std::same_as<T, LightCompareType>) static constexpr int Compare(const T& lhs,
- const KThread& rhs) {
+ std::same_as<T, RedBlackKeyType>) static constexpr int Compare(const T& lhs,
+ const KThread& rhs) {
const u64 l_key = lhs.GetConditionVariableKey();
const u64 r_key = rhs.GetConditionVariableKey();
diff --git a/src/core/hle/kernel/k_thread_local_page.cpp b/src/core/hle/kernel/k_thread_local_page.cpp
new file mode 100644
index 000000000..4653c29f6
--- /dev/null
+++ b/src/core/hle/kernel/k_thread_local_page.cpp
@@ -0,0 +1,65 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/scope_exit.h"
+#include "core/hle/kernel/k_memory_block.h"
+#include "core/hle/kernel/k_page_table.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/kernel/k_thread_local_page.h"
+#include "core/hle/kernel/kernel.h"
+
+namespace Kernel {
+
+ResultCode KThreadLocalPage::Initialize(KernelCore& kernel, KProcess* process) {
+ // Set that this process owns us.
+ m_owner = process;
+ m_kernel = &kernel;
+
+ // Allocate a new page.
+ KPageBuffer* page_buf = KPageBuffer::Allocate(kernel);
+ R_UNLESS(page_buf != nullptr, ResultOutOfMemory);
+ auto page_buf_guard = SCOPE_GUARD({ KPageBuffer::Free(kernel, page_buf); });
+
+ // Map the address in.
+ const auto phys_addr = kernel.System().DeviceMemory().GetPhysicalAddr(page_buf);
+ R_TRY(m_owner->PageTable().MapPages(std::addressof(m_virt_addr), 1, PageSize, phys_addr,
+ KMemoryState::ThreadLocal,
+ KMemoryPermission::UserReadWrite));
+
+ // We succeeded.
+ page_buf_guard.Cancel();
+
+ return ResultSuccess;
+}
+
+ResultCode KThreadLocalPage::Finalize() {
+ // Get the physical address of the page.
+ const PAddr phys_addr = m_owner->PageTable().GetPhysicalAddr(m_virt_addr);
+ ASSERT(phys_addr);
+
+ // Unmap the page.
+ R_TRY(m_owner->PageTable().UnmapPages(this->GetAddress(), 1, KMemoryState::ThreadLocal));
+
+ // Free the page.
+ KPageBuffer::Free(*m_kernel, KPageBuffer::FromPhysicalAddress(m_kernel->System(), phys_addr));
+
+ return ResultSuccess;
+}
+
+VAddr KThreadLocalPage::Reserve() {
+ for (size_t i = 0; i < m_is_region_free.size(); i++) {
+ if (m_is_region_free[i]) {
+ m_is_region_free[i] = false;
+ return this->GetRegionAddress(i);
+ }
+ }
+
+ return 0;
+}
+
+void KThreadLocalPage::Release(VAddr addr) {
+ m_is_region_free[this->GetRegionIndex(addr)] = true;
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_thread_local_page.h b/src/core/hle/kernel/k_thread_local_page.h
new file mode 100644
index 000000000..658c67e94
--- /dev/null
+++ b/src/core/hle/kernel/k_thread_local_page.h
@@ -0,0 +1,112 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/intrusive_red_black_tree.h"
+#include "core/hle/kernel/k_page_buffer.h"
+#include "core/hle/kernel/memory_types.h"
+#include "core/hle/kernel/slab_helpers.h"
+#include "core/hle/result.h"
+
+namespace Kernel {
+
+class KernelCore;
+class KProcess;
+
+class KThreadLocalPage final : public Common::IntrusiveRedBlackTreeBaseNode<KThreadLocalPage>,
+ public KSlabAllocated<KThreadLocalPage> {
+public:
+ static constexpr size_t RegionsPerPage = PageSize / Svc::ThreadLocalRegionSize;
+ static_assert(RegionsPerPage > 0);
+
+public:
+ constexpr explicit KThreadLocalPage(VAddr addr = {}) : m_virt_addr(addr) {
+ m_is_region_free.fill(true);
+ }
+
+ constexpr VAddr GetAddress() const {
+ return m_virt_addr;
+ }
+
+ ResultCode Initialize(KernelCore& kernel, KProcess* process);
+ ResultCode Finalize();
+
+ VAddr Reserve();
+ void Release(VAddr addr);
+
+ bool IsAllUsed() const {
+ return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(),
+ [](bool is_free) { return !is_free; });
+ }
+
+ bool IsAllFree() const {
+ return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(),
+ [](bool is_free) { return is_free; });
+ }
+
+ bool IsAnyUsed() const {
+ return !this->IsAllFree();
+ }
+
+ bool IsAnyFree() const {
+ return !this->IsAllUsed();
+ }
+
+public:
+ using RedBlackKeyType = VAddr;
+
+ static constexpr RedBlackKeyType GetRedBlackKey(const RedBlackKeyType& v) {
+ return v;
+ }
+ static constexpr RedBlackKeyType GetRedBlackKey(const KThreadLocalPage& v) {
+ return v.GetAddress();
+ }
+
+ template <typename T>
+ requires(std::same_as<T, KThreadLocalPage> ||
+ std::same_as<T, RedBlackKeyType>) static constexpr int Compare(const T& lhs,
+ const KThreadLocalPage&
+ rhs) {
+ const VAddr lval = GetRedBlackKey(lhs);
+ const VAddr rval = GetRedBlackKey(rhs);
+
+ if (lval < rval) {
+ return -1;
+ } else if (lval == rval) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+
+private:
+ constexpr VAddr GetRegionAddress(size_t i) const {
+ return this->GetAddress() + i * Svc::ThreadLocalRegionSize;
+ }
+
+ constexpr bool Contains(VAddr addr) const {
+ return this->GetAddress() <= addr && addr < this->GetAddress() + PageSize;
+ }
+
+ constexpr size_t GetRegionIndex(VAddr addr) const {
+ ASSERT(Common::IsAligned(addr, Svc::ThreadLocalRegionSize));
+ ASSERT(this->Contains(addr));
+ return (addr - this->GetAddress()) / Svc::ThreadLocalRegionSize;
+ }
+
+private:
+ VAddr m_virt_addr{};
+ KProcess* m_owner{};
+ KernelCore* m_kernel{};
+ std::array<bool, RegionsPerPage> m_is_region_free{};
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 797f47021..f9828bc43 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -52,7 +52,7 @@ namespace Kernel {
struct KernelCore::Impl {
explicit Impl(Core::System& system_, KernelCore& kernel_)
- : time_manager{system_}, object_list_container{kernel_},
+ : time_manager{system_},
service_threads_manager{1, "yuzu:ServiceThreadsManager"}, system{system_} {}
void SetMulticore(bool is_multi) {
@@ -60,6 +60,7 @@ struct KernelCore::Impl {
}
void Initialize(KernelCore& kernel) {
+ global_object_list_container = std::make_unique<KAutoObjectWithListContainer>(kernel);
global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel);
global_handle_table->Initialize(KHandleTable::MaxTableSize);
@@ -70,14 +71,13 @@ struct KernelCore::Impl {
// Derive the initial memory layout from the emulated board
Init::InitializeSlabResourceCounts(kernel);
- KMemoryLayout memory_layout;
- DeriveInitialMemoryLayout(memory_layout);
- Init::InitializeSlabHeaps(system, memory_layout);
+ DeriveInitialMemoryLayout();
+ Init::InitializeSlabHeaps(system, *memory_layout);
// Initialize kernel memory and resources.
- InitializeSystemResourceLimit(kernel, system.CoreTiming(), memory_layout);
- InitializeMemoryLayout(memory_layout);
- InitializePageSlab();
+ InitializeSystemResourceLimit(kernel, system.CoreTiming());
+ InitializeMemoryLayout();
+ Init::InitializeKPageBufferSlabHeap(system);
InitializeSchedulers();
InitializeSuspendThreads();
InitializePreemption(kernel);
@@ -108,19 +108,6 @@ struct KernelCore::Impl {
for (auto* server_port : server_ports_) {
server_port->Close();
}
- // Close all open server sessions.
- std::unordered_set<KServerSession*> server_sessions_;
- {
- std::lock_guard lk(server_sessions_lock);
- server_sessions_ = server_sessions;
- server_sessions.clear();
- }
- for (auto* server_session : server_sessions_) {
- server_session->Close();
- }
-
- // Ensure that the object list container is finalized and properly shutdown.
- object_list_container.Finalize();
// Ensures all service threads gracefully shutdown.
ClearServiceThreads();
@@ -195,11 +182,15 @@ struct KernelCore::Impl {
{
std::lock_guard lk(registered_objects_lock);
if (registered_objects.size()) {
- LOG_WARNING(Kernel, "{} kernel objects were dangling on shutdown!",
- registered_objects.size());
+ LOG_DEBUG(Kernel, "{} kernel objects were dangling on shutdown!",
+ registered_objects.size());
registered_objects.clear();
}
}
+
+ // Ensure that the object list container is finalized and properly shutdown.
+ global_object_list_container->Finalize();
+ global_object_list_container.reset();
}
void InitializePhysicalCores() {
@@ -219,12 +210,11 @@ struct KernelCore::Impl {
// Creates the default system resource limit
void InitializeSystemResourceLimit(KernelCore& kernel,
- const Core::Timing::CoreTiming& core_timing,
- const KMemoryLayout& memory_layout) {
+ const Core::Timing::CoreTiming& core_timing) {
system_resource_limit = KResourceLimit::Create(system.Kernel());
system_resource_limit->Initialize(&core_timing);
- const auto [total_size, kernel_size] = memory_layout.GetTotalAndKernelMemorySizes();
+ const auto [total_size, kernel_size] = memory_layout->GetTotalAndKernelMemorySizes();
// If setting the default system values fails, then something seriously wrong has occurred.
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::PhysicalMemory, total_size)
@@ -293,15 +283,16 @@ struct KernelCore::Impl {
// Gets the dummy KThread for the caller, allocating a new one if this is the first time
KThread* GetHostDummyThread() {
- auto make_thread = [this]() {
- KThread* thread = KThread::Create(system.Kernel());
+ auto initialize = [this](KThread* thread) {
ASSERT(KThread::InitializeDummyThread(thread).IsSuccess());
thread->SetName(fmt::format("DummyThread:{}", GetHostThreadId()));
return thread;
};
- thread_local KThread* saved_thread = make_thread();
- return saved_thread;
+ thread_local auto raw_thread = KThread(system.Kernel());
+ thread_local auto thread = initialize(&raw_thread);
+
+ return thread;
}
/// Registers a CPU core thread by allocating a host thread ID for it
@@ -353,16 +344,18 @@ struct KernelCore::Impl {
return schedulers[thread_id]->GetCurrentThread();
}
- void DeriveInitialMemoryLayout(KMemoryLayout& memory_layout) {
+ void DeriveInitialMemoryLayout() {
+ memory_layout = std::make_unique<KMemoryLayout>();
+
// Insert the root region for the virtual memory tree, from which all other regions will
// derive.
- memory_layout.GetVirtualMemoryRegionTree().InsertDirectly(
+ memory_layout->GetVirtualMemoryRegionTree().InsertDirectly(
KernelVirtualAddressSpaceBase,
KernelVirtualAddressSpaceBase + KernelVirtualAddressSpaceSize - 1);
// Insert the root region for the physical memory tree, from which all other regions will
// derive.
- memory_layout.GetPhysicalMemoryRegionTree().InsertDirectly(
+ memory_layout->GetPhysicalMemoryRegionTree().InsertDirectly(
KernelPhysicalAddressSpaceBase,
KernelPhysicalAddressSpaceBase + KernelPhysicalAddressSpaceSize - 1);
@@ -379,7 +372,7 @@ struct KernelCore::Impl {
if (!(kernel_region_start + KernelRegionSize - 1 <= KernelVirtualAddressSpaceLast)) {
kernel_region_size = KernelVirtualAddressSpaceEnd - kernel_region_start;
}
- ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
kernel_region_start, kernel_region_size, KMemoryRegionType_Kernel));
// Setup the code region.
@@ -388,11 +381,11 @@ struct KernelCore::Impl {
Common::AlignDown(code_start_virt_addr, CodeRegionAlign);
constexpr VAddr code_region_end = Common::AlignUp(code_end_virt_addr, CodeRegionAlign);
constexpr size_t code_region_size = code_region_end - code_region_start;
- ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
code_region_start, code_region_size, KMemoryRegionType_KernelCode));
// Setup board-specific device physical regions.
- Init::SetupDevicePhysicalMemoryRegions(memory_layout);
+ Init::SetupDevicePhysicalMemoryRegions(*memory_layout);
// Determine the amount of space needed for the misc region.
size_t misc_region_needed_size;
@@ -401,7 +394,7 @@ struct KernelCore::Impl {
misc_region_needed_size = Core::Hardware::NUM_CPU_CORES * (3 * (PageSize + PageSize));
// Account for each auto-map device.
- for (const auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+ for (const auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
if (region.HasTypeAttribute(KMemoryRegionAttr_ShouldKernelMap)) {
// Check that the region is valid.
ASSERT(region.GetEndAddress() != 0);
@@ -426,22 +419,22 @@ struct KernelCore::Impl {
// Setup the misc region.
const VAddr misc_region_start =
- memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
+ memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
misc_region_size, MiscRegionAlign, KMemoryRegionType_Kernel);
- ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
misc_region_start, misc_region_size, KMemoryRegionType_KernelMisc));
// Setup the stack region.
constexpr size_t StackRegionSize = 14_MiB;
constexpr size_t StackRegionAlign = KernelAslrAlignment;
const VAddr stack_region_start =
- memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
+ memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
StackRegionSize, StackRegionAlign, KMemoryRegionType_Kernel);
- ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
stack_region_start, StackRegionSize, KMemoryRegionType_KernelStack));
// Determine the size of the resource region.
- const size_t resource_region_size = memory_layout.GetResourceRegionSizeForInit();
+ const size_t resource_region_size = memory_layout->GetResourceRegionSizeForInit();
// Determine the size of the slab region.
const size_t slab_region_size =
@@ -458,23 +451,23 @@ struct KernelCore::Impl {
Common::AlignUp(code_end_phys_addr + slab_region_size, SlabRegionAlign) -
Common::AlignDown(code_end_phys_addr, SlabRegionAlign);
const VAddr slab_region_start =
- memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
+ memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
slab_region_needed_size, SlabRegionAlign, KMemoryRegionType_Kernel) +
(code_end_phys_addr % SlabRegionAlign);
- ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
slab_region_start, slab_region_size, KMemoryRegionType_KernelSlab));
// Setup the temp region.
constexpr size_t TempRegionSize = 128_MiB;
constexpr size_t TempRegionAlign = KernelAslrAlignment;
const VAddr temp_region_start =
- memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
+ memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
TempRegionSize, TempRegionAlign, KMemoryRegionType_Kernel);
- ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize,
- KMemoryRegionType_KernelTemp));
+ ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize,
+ KMemoryRegionType_KernelTemp));
// Automatically map in devices that have auto-map attributes.
- for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+ for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
// We only care about kernel regions.
if (!region.IsDerivedFrom(KMemoryRegionType_Kernel)) {
continue;
@@ -501,21 +494,21 @@ struct KernelCore::Impl {
const size_t map_size =
Common::AlignUp(region.GetEndAddress(), PageSize) - map_phys_addr;
const VAddr map_virt_addr =
- memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
+ memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
map_size, PageSize, KMemoryRegionType_KernelMisc, PageSize);
- ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
map_virt_addr, map_size, KMemoryRegionType_KernelMiscMappedDevice));
region.SetPairAddress(map_virt_addr + region.GetAddress() - map_phys_addr);
}
- Init::SetupDramPhysicalMemoryRegions(memory_layout);
+ Init::SetupDramPhysicalMemoryRegions(*memory_layout);
// Insert a physical region for the kernel code region.
- ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
code_start_phys_addr, code_region_size, KMemoryRegionType_DramKernelCode));
// Insert a physical region for the kernel slab region.
- ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
slab_start_phys_addr, slab_region_size, KMemoryRegionType_DramKernelSlab));
// Determine size available for kernel page table heaps, requiring > 8 MB.
@@ -524,12 +517,12 @@ struct KernelCore::Impl {
ASSERT(page_table_heap_size / 4_MiB > 2);
// Insert a physical region for the kernel page table heap region
- ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
slab_end_phys_addr, page_table_heap_size, KMemoryRegionType_DramKernelPtHeap));
// All DRAM regions that we haven't tagged by this point will be mapped under the linear
// mapping. Tag them.
- for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+ for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
if (region.GetType() == KMemoryRegionType_Dram) {
// Check that the region is valid.
ASSERT(region.GetEndAddress() != 0);
@@ -541,7 +534,7 @@ struct KernelCore::Impl {
// Get the linear region extents.
const auto linear_extents =
- memory_layout.GetPhysicalMemoryRegionTree().GetDerivedRegionExtents(
+ memory_layout->GetPhysicalMemoryRegionTree().GetDerivedRegionExtents(
KMemoryRegionAttr_LinearMapped);
ASSERT(linear_extents.GetEndAddress() != 0);
@@ -553,7 +546,7 @@ struct KernelCore::Impl {
Common::AlignUp(linear_extents.GetEndAddress(), LinearRegionAlign) -
aligned_linear_phys_start;
const VAddr linear_region_start =
- memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
+ memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
linear_region_size, LinearRegionAlign, KMemoryRegionType_None, LinearRegionAlign);
const u64 linear_region_phys_to_virt_diff = linear_region_start - aligned_linear_phys_start;
@@ -562,7 +555,7 @@ struct KernelCore::Impl {
{
PAddr cur_phys_addr = 0;
u64 cur_size = 0;
- for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+ for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
if (!region.HasTypeAttribute(KMemoryRegionAttr_LinearMapped)) {
continue;
}
@@ -581,55 +574,49 @@ struct KernelCore::Impl {
const VAddr region_virt_addr =
region.GetAddress() + linear_region_phys_to_virt_diff;
- ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
region_virt_addr, region.GetSize(),
GetTypeForVirtualLinearMapping(region.GetType())));
region.SetPairAddress(region_virt_addr);
KMemoryRegion* virt_region =
- memory_layout.GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr);
+ memory_layout->GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr);
ASSERT(virt_region != nullptr);
virt_region->SetPairAddress(region.GetAddress());
}
}
// Insert regions for the initial page table region.
- ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
resource_end_phys_addr, KernelPageTableHeapSize, KMemoryRegionType_DramKernelInitPt));
- ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(
+ ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
resource_end_phys_addr + linear_region_phys_to_virt_diff, KernelPageTableHeapSize,
KMemoryRegionType_VirtualDramKernelInitPt));
// All linear-mapped DRAM regions that we haven't tagged by this point will be allocated to
// some pool partition. Tag them.
- for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) {
+ for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
if (region.GetType() == (KMemoryRegionType_Dram | KMemoryRegionAttr_LinearMapped)) {
region.SetType(KMemoryRegionType_DramPoolPartition);
}
}
// Setup all other memory regions needed to arrange the pool partitions.
- Init::SetupPoolPartitionMemoryRegions(memory_layout);
+ Init::SetupPoolPartitionMemoryRegions(*memory_layout);
// Cache all linear regions in their own trees for faster access, later.
- memory_layout.InitializeLinearMemoryRegionTrees(aligned_linear_phys_start,
- linear_region_start);
+ memory_layout->InitializeLinearMemoryRegionTrees(aligned_linear_phys_start,
+ linear_region_start);
}
- void InitializeMemoryLayout(const KMemoryLayout& memory_layout) {
- const auto system_pool = memory_layout.GetKernelSystemPoolRegionPhysicalExtents();
- const auto applet_pool = memory_layout.GetKernelAppletPoolRegionPhysicalExtents();
- const auto application_pool = memory_layout.GetKernelApplicationPoolRegionPhysicalExtents();
+ void InitializeMemoryLayout() {
+ const auto system_pool = memory_layout->GetKernelSystemPoolRegionPhysicalExtents();
- // Initialize memory managers
+ // Initialize the memory manager.
memory_manager = std::make_unique<KMemoryManager>(system);
- memory_manager->InitializeManager(KMemoryManager::Pool::Application,
- application_pool.GetAddress(),
- application_pool.GetEndAddress());
- memory_manager->InitializeManager(KMemoryManager::Pool::Applet, applet_pool.GetAddress(),
- applet_pool.GetEndAddress());
- memory_manager->InitializeManager(KMemoryManager::Pool::System, system_pool.GetAddress(),
- system_pool.GetEndAddress());
+ const auto& management_region = memory_layout->GetPoolManagementRegion();
+ ASSERT(management_region.GetEndAddress() != 0);
+ memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
// Setup memory regions for emulated processes
// TODO(bunnei): These should not be hardcoded regions initialized within the kernel
@@ -666,22 +653,6 @@ struct KernelCore::Impl {
time_phys_addr, time_size, "Time:SharedMemory");
}
- void InitializePageSlab() {
- // Allocate slab heaps
- user_slab_heap_pages =
- std::make_unique<KSlabHeap<Page>>(KSlabHeap<Page>::AllocationType::Guest);
-
- // TODO(ameerj): This should be derived, not hardcoded within the kernel
- constexpr u64 user_slab_heap_size{0x3de000};
- // Reserve slab heaps
- ASSERT(
- system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size));
- // Initialize slab heap
- user_slab_heap_pages->Initialize(
- system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase),
- user_slab_heap_size);
- }
-
KClientPort* CreateNamedServicePort(std::string name) {
auto search = service_interface_factory.find(name);
if (search == service_interface_factory.end()) {
@@ -719,7 +690,6 @@ struct KernelCore::Impl {
}
std::mutex server_ports_lock;
- std::mutex server_sessions_lock;
std::mutex registered_objects_lock;
std::mutex registered_in_use_objects_lock;
@@ -743,14 +713,13 @@ struct KernelCore::Impl {
// stores all the objects in place.
std::unique_ptr<KHandleTable> global_handle_table;
- KAutoObjectWithListContainer object_list_container;
+ std::unique_ptr<KAutoObjectWithListContainer> global_object_list_container;
/// Map of named ports managed by the kernel, which can be retrieved using
/// the ConnectToPort SVC.
std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory;
NamedPortTable named_ports;
std::unordered_set<KServerPort*> server_ports;
- std::unordered_set<KServerSession*> server_sessions;
std::unordered_set<KAutoObject*> registered_objects;
std::unordered_set<KAutoObject*> registered_in_use_objects;
@@ -762,7 +731,6 @@ struct KernelCore::Impl {
// Kernel memory management
std::unique_ptr<KMemoryManager> memory_manager;
- std::unique_ptr<KSlabHeap<Page>> user_slab_heap_pages;
// Shared memory for services
Kernel::KSharedMemory* hid_shared_mem{};
@@ -770,6 +738,9 @@ struct KernelCore::Impl {
Kernel::KSharedMemory* irs_shared_mem{};
Kernel::KSharedMemory* time_shared_mem{};
+ // Memory layout
+ std::unique_ptr<KMemoryLayout> memory_layout;
+
// Threads used for services
std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
Common::ThreadWorker service_threads_manager;
@@ -918,11 +889,11 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
}
KAutoObjectWithListContainer& KernelCore::ObjectListContainer() {
- return impl->object_list_container;
+ return *impl->global_object_list_container;
}
const KAutoObjectWithListContainer& KernelCore::ObjectListContainer() const {
- return impl->object_list_container;
+ return *impl->global_object_list_container;
}
void KernelCore::InvalidateAllInstructionCaches() {
@@ -952,16 +923,6 @@ KClientPort* KernelCore::CreateNamedServicePort(std::string name) {
return impl->CreateNamedServicePort(std::move(name));
}
-void KernelCore::RegisterServerSession(KServerSession* server_session) {
- std::lock_guard lk(impl->server_sessions_lock);
- impl->server_sessions.insert(server_session);
-}
-
-void KernelCore::UnregisterServerSession(KServerSession* server_session) {
- std::lock_guard lk(impl->server_sessions_lock);
- impl->server_sessions.erase(server_session);
-}
-
void KernelCore::RegisterKernelObject(KAutoObject* object) {
std::lock_guard lk(impl->registered_objects_lock);
impl->registered_objects.insert(object);
@@ -1034,14 +995,6 @@ const KMemoryManager& KernelCore::MemoryManager() const {
return *impl->memory_manager;
}
-KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() {
- return *impl->user_slab_heap_pages;
-}
-
-const KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() const {
- return *impl->user_slab_heap_pages;
-}
-
Kernel::KSharedMemory& KernelCore::GetHidSharedMem() {
return *impl->hid_shared_mem;
}
@@ -1135,6 +1088,10 @@ const KWorkerTaskManager& KernelCore::WorkerTaskManager() const {
return impl->worker_task_manager;
}
+const KMemoryLayout& KernelCore::MemoryLayout() const {
+ return *impl->memory_layout;
+}
+
bool KernelCore::IsPhantomModeForSingleCore() const {
return impl->IsPhantomModeForSingleCore();
}
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 0e04fc3bb..7087bbda6 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -41,7 +41,9 @@ class KClientSession;
class KEvent;
class KHandleTable;
class KLinkedListNode;
+class KMemoryLayout;
class KMemoryManager;
+class KPageBuffer;
class KPort;
class KProcess;
class KResourceLimit;
@@ -51,6 +53,7 @@ class KSession;
class KSharedMemory;
class KSharedMemoryInfo;
class KThread;
+class KThreadLocalPage;
class KTransferMemory;
class KWorkerTaskManager;
class KWritableEvent;
@@ -193,14 +196,6 @@ public:
/// Opens a port to a service previously registered with RegisterNamedService.
KClientPort* CreateNamedServicePort(std::string name);
- /// Registers a server session with the gobal emulation state, to be freed on shutdown. This is
- /// necessary because we do not emulate processes for HLE sessions.
- void RegisterServerSession(KServerSession* server_session);
-
- /// Unregisters a server session previously registered with RegisterServerSession when it was
- /// destroyed during the current emulation session.
- void UnregisterServerSession(KServerSession* server_session);
-
/// Registers all kernel objects with the global emulation state, this is purely for tracking
/// leaks after emulation has been shutdown.
void RegisterKernelObject(KAutoObject* object);
@@ -238,12 +233,6 @@ public:
/// Gets the virtual memory manager for the kernel.
const KMemoryManager& MemoryManager() const;
- /// Gets the slab heap allocated for user space pages.
- KSlabHeap<Page>& GetUserSlabHeapPages();
-
- /// Gets the slab heap allocated for user space pages.
- const KSlabHeap<Page>& GetUserSlabHeapPages() const;
-
/// Gets the shared memory object for HID services.
Kernel::KSharedMemory& GetHidSharedMem();
@@ -335,6 +324,10 @@ public:
return slab_heap_container->writeable_event;
} else if constexpr (std::is_same_v<T, KCodeMemory>) {
return slab_heap_container->code_memory;
+ } else if constexpr (std::is_same_v<T, KPageBuffer>) {
+ return slab_heap_container->page_buffer;
+ } else if constexpr (std::is_same_v<T, KThreadLocalPage>) {
+ return slab_heap_container->thread_local_page;
}
}
@@ -350,6 +343,9 @@ public:
/// Gets the current worker task manager, used for dispatching KThread/KProcess tasks.
const KWorkerTaskManager& WorkerTaskManager() const;
+ /// Gets the memory layout.
+ const KMemoryLayout& MemoryLayout() const;
+
private:
friend class KProcess;
friend class KThread;
@@ -393,6 +389,8 @@ private:
KSlabHeap<KTransferMemory> transfer_memory;
KSlabHeap<KWritableEvent> writeable_event;
KSlabHeap<KCodeMemory> code_memory;
+ KSlabHeap<KPageBuffer> page_buffer;
+ KSlabHeap<KThreadLocalPage> thread_local_page;
};
std::unique_ptr<SlabHeapContainer> slab_heap_container;
diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp
index 4eb3a5988..52d25b837 100644
--- a/src/core/hle/kernel/service_thread.cpp
+++ b/src/core/hle/kernel/service_thread.cpp
@@ -49,12 +49,9 @@ ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std
return;
}
+ // Allocate a dummy guest thread for this host thread.
kernel.RegisterHostThread();
- // Ensure the dummy thread allocated for this host thread is closed on exit.
- auto* dummy_thread = kernel.GetCurrentEmuThread();
- SCOPE_EXIT({ dummy_thread->Close(); });
-
while (true) {
std::function<void()> task;
diff --git a/src/core/hle/kernel/slab_helpers.h b/src/core/hle/kernel/slab_helpers.h
index f1c11256e..dc1e48fc9 100644
--- a/src/core/hle/kernel/slab_helpers.h
+++ b/src/core/hle/kernel/slab_helpers.h
@@ -59,7 +59,7 @@ class KAutoObjectWithSlabHeapAndContainer : public Base {
private:
static Derived* Allocate(KernelCore& kernel) {
- return kernel.SlabHeap<Derived>().AllocateWithKernel(kernel);
+ return kernel.SlabHeap<Derived>().Allocate(kernel);
}
static void Free(KernelCore& kernel, Derived* obj) {
diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h
index 365e22e4e..b2e9ec092 100644
--- a/src/core/hle/kernel/svc_types.h
+++ b/src/core/hle/kernel/svc_types.h
@@ -96,4 +96,6 @@ constexpr inline s32 IdealCoreNoUpdate = -3;
constexpr inline s32 LowestThreadPriority = 63;
constexpr inline s32 HighestThreadPriority = 0;
+constexpr inline size_t ThreadLocalRegionSize = 0x200;
+
} // namespace Kernel::Svc
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 2f8e21568..420de3c54 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -980,7 +980,7 @@ private:
LOG_DEBUG(Service_AM, "called");
IPC::RequestParser rp{ctx};
- applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>());
+ applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>().lock());
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
@@ -1007,7 +1007,7 @@ private:
LOG_DEBUG(Service_AM, "called");
IPC::RequestParser rp{ctx};
- applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>());
+ applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>().lock());
ASSERT(applet->IsInitialized());
applet->ExecuteInteractive();
diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp
index b8c2c6e51..ff0bbb788 100644
--- a/src/core/hle/service/kernel_helpers.cpp
+++ b/src/core/hle/service/kernel_helpers.cpp
@@ -17,21 +17,12 @@ namespace Service::KernelHelpers {
ServiceContext::ServiceContext(Core::System& system_, std::string name_)
: kernel(system_.Kernel()) {
-
- // Create a resource limit for the process.
- const auto physical_memory_size =
- kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::System);
- auto* resource_limit = Kernel::CreateResourceLimitForProcess(system_, physical_memory_size);
-
// Create the process.
process = Kernel::KProcess::Create(kernel);
ASSERT(Kernel::KProcess::Initialize(process, system_, std::move(name_),
Kernel::KProcess::ProcessType::KernelInternal,
- resource_limit)
+ kernel.GetSystemResourceLimit())
.IsSuccess());
-
- // Close reference to our resource limit, as the process opens one.
- resource_limit->Close();
}
ServiceContext::~ServiceContext() {
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9fc7bb1b1..099276420 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -288,7 +288,7 @@ public:
}
bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const {
- constexpr std::size_t padding_size{4 * Kernel::PageSize};
+ const std::size_t padding_size{page_table.GetNumGuardPages() * Kernel::PageSize};
const auto start_info{page_table.QueryInfo(start - 1)};
if (start_info.state != Kernel::KMemoryState::Free) {
@@ -308,31 +308,69 @@ public:
return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize());
}
- VAddr GetRandomMapRegion(const Kernel::KPageTable& page_table, std::size_t size) const {
- VAddr addr{};
- const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >>
- Kernel::PageBits};
- do {
- addr = page_table.GetAliasCodeRegionStart() +
- (Kernel::KSystemControl::GenerateRandomRange(0, end_pages) << Kernel::PageBits);
- } while (!page_table.IsInsideAddressSpace(addr, size) ||
- page_table.IsInsideHeapRegion(addr, size) ||
- page_table.IsInsideAliasRegion(addr, size));
- return addr;
+ ResultCode GetAvailableMapRegion(Kernel::KPageTable& page_table, u64 size, VAddr& out_addr) {
+ size = Common::AlignUp(size, Kernel::PageSize);
+ size += page_table.GetNumGuardPages() * Kernel::PageSize * 4;
+
+ const auto is_region_available = [&](VAddr addr) {
+ const auto end_addr = addr + size;
+ while (addr < end_addr) {
+ if (system.Memory().IsValidVirtualAddress(addr)) {
+ return false;
+ }
+
+ if (!page_table.IsInsideAddressSpace(out_addr, size)) {
+ return false;
+ }
+
+ if (page_table.IsInsideHeapRegion(out_addr, size)) {
+ return false;
+ }
+
+ if (page_table.IsInsideAliasRegion(out_addr, size)) {
+ return false;
+ }
+
+ addr += Kernel::PageSize;
+ }
+ return true;
+ };
+
+ bool succeeded = false;
+ const auto map_region_end =
+ page_table.GetAliasCodeRegionStart() + page_table.GetAliasCodeRegionSize();
+ while (current_map_addr < map_region_end) {
+ if (is_region_available(current_map_addr)) {
+ succeeded = true;
+ break;
+ }
+ current_map_addr += 0x100000;
+ }
+
+ if (!succeeded) {
+ UNREACHABLE_MSG("Out of address space!");
+ return Kernel::ResultOutOfMemory;
+ }
+
+ out_addr = current_map_addr;
+ current_map_addr += size;
+
+ return ResultSuccess;
}
- ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr baseAddress,
- u64 size) const {
+ ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr base_addr, u64 size) {
+ auto& page_table{process->PageTable()};
+ VAddr addr{};
+
for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
- auto& page_table{process->PageTable()};
- const VAddr addr{GetRandomMapRegion(page_table, size)};
- const ResultCode result{page_table.MapCodeMemory(addr, baseAddress, size)};
+ R_TRY(GetAvailableMapRegion(page_table, size, addr));
+ const ResultCode result{page_table.MapCodeMemory(addr, base_addr, size)};
if (result == Kernel::ResultInvalidCurrentMemory) {
continue;
}
- CASCADE_CODE(result);
+ R_TRY(result);
if (ValidateRegionForMap(page_table, addr, size)) {
return addr;
@@ -343,7 +381,7 @@ public:
}
ResultVal<VAddr> MapNro(Kernel::KProcess* process, VAddr nro_addr, std::size_t nro_size,
- VAddr bss_addr, std::size_t bss_size, std::size_t size) const {
+ VAddr bss_addr, std::size_t bss_size, std::size_t size) {
for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
auto& page_table{process->PageTable()};
VAddr addr{};
@@ -597,6 +635,7 @@ public:
LOG_WARNING(Service_LDR, "(STUBBED) called");
initialized = true;
+ current_map_addr = system.CurrentProcess()->PageTable().GetAliasCodeRegionStart();
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
@@ -607,6 +646,7 @@ private:
std::map<VAddr, NROInfo> nro;
std::map<VAddr, std::vector<SHA256Hash>> nrr;
+ VAddr current_map_addr{};
bool IsValidNROHash(const SHA256Hash& hash) const {
return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) {
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index eaa172595..695a1faa6 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -81,6 +81,8 @@ ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name
}
auto* port = Kernel::KPort::Create(kernel);
+ SCOPE_EXIT({ port->Close(); });
+
port->Initialize(ServerSessionCountMax, false, name);
auto handler = it->second;
port->GetServerPort().SetSessionHandler(std::move(handler));
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index b412957c7..2b360e073 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
struct RescalingLayout {
alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
- alignas(16) u32 down_factor;
+ u32 down_factor;
};
constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
index e0fe47912..f3c7ceb57 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -13,59 +13,535 @@ namespace {
// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
u64 ttbl) {
- IR::U32 r{ir.Imm32(0)};
- const IR::U32 not_a{ir.BitwiseNot(a)};
- const IR::U32 not_b{ir.BitwiseNot(b)};
- const IR::U32 not_c{ir.BitwiseNot(c)};
- if (ttbl & 0x01) {
- // r |= ~a & ~b & ~c;
- const auto lhs{ir.BitwiseAnd(not_a, not_b)};
- const auto rhs{ir.BitwiseAnd(lhs, not_c)};
- r = ir.BitwiseOr(r, rhs);
+ switch (ttbl) {
+ // generated code, do not edit manually
+ case 0:
+ return ir.Imm32(0);
+ case 1:
+ return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c)));
+ case 2:
+ return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
+ case 3:
+ return ir.BitwiseNot(ir.BitwiseOr(a, b));
+ case 4:
+ return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
+ case 5:
+ return ir.BitwiseNot(ir.BitwiseOr(a, c));
+ case 6:
+ return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
+ case 7:
+ return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c)));
+ case 8:
+ return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
+ case 9:
+ return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c)));
+ case 10:
+ return ir.BitwiseAnd(c, ir.BitwiseNot(a));
+ case 11:
+ return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 12:
+ return ir.BitwiseAnd(b, ir.BitwiseNot(a));
+ case 13:
+ return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 14:
+ return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
+ case 15:
+ return ir.BitwiseNot(a);
+ case 16:
+ return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
+ case 17:
+ return ir.BitwiseNot(ir.BitwiseOr(b, c));
+ case 18:
+ return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
+ case 19:
+ return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c)));
+ case 20:
+ return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
+ case 21:
+ return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
+ case 22:
+ return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
+ case 23:
+ return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)),
+ ir.BitwiseNot(a));
+ case 24:
+ return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
+ case 25:
+ return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)));
+ case 26:
+ return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
+ case 27:
+ return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
+ case 28:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
+ case 29:
+ return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
+ case 30:
+ return ir.BitwiseXor(a, ir.BitwiseOr(b, c));
+ case 31:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)));
+ case 32:
+ return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
+ case 33:
+ return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
+ case 34:
+ return ir.BitwiseAnd(c, ir.BitwiseNot(b));
+ case 35:
+ return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 36:
+ return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c));
+ case 37:
+ return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)));
+ case 38:
+ return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
+ case 39:
+ return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 40:
+ return ir.BitwiseAnd(c, ir.BitwiseXor(a, b));
+ case 41:
+ return ir.BitwiseXor(ir.BitwiseOr(a, b),
+ ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)));
+ case 42:
+ return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
+ case 43:
+ return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)),
+ ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
+ case 44:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b));
+ case 45:
+ return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 46:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c));
+ case 47:
+ return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a));
+ case 48:
+ return ir.BitwiseAnd(a, ir.BitwiseNot(b));
+ case 49:
+ return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c)));
+ case 50:
+ return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
+ case 51:
+ return ir.BitwiseNot(b);
+ case 52:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
+ case 53:
+ return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 54:
+ return ir.BitwiseXor(b, ir.BitwiseOr(a, c));
+ case 55:
+ return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)));
+ case 56:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b));
+ case 57:
+ return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
+ case 58:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c));
+ case 59:
+ return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b));
+ case 60:
+ return ir.BitwiseXor(a, b);
+ case 61:
+ return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b));
+ case 62:
+ return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b));
+ case 63:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, b));
+ case 64:
+ return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
+ case 65:
+ return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
+ case 66:
+ return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c));
+ case 67:
+ return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)));
+ case 68:
+ return ir.BitwiseAnd(b, ir.BitwiseNot(c));
+ case 69:
+ return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 70:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
+ case 71:
+ return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 72:
+ return ir.BitwiseAnd(b, ir.BitwiseXor(a, c));
+ case 73:
+ return ir.BitwiseXor(ir.BitwiseOr(a, c),
+ ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)));
+ case 74:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c));
+ case 75:
+ return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 76:
+ return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
+ case 77:
+ return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)),
+ ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
+ case 78:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c));
+ case 79:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a));
+ case 80:
+ return ir.BitwiseAnd(a, ir.BitwiseNot(c));
+ case 81:
+ return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b)));
+ case 82:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
+ case 83:
+ return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 84:
+ return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
+ case 85:
+ return ir.BitwiseNot(c);
+ case 86:
+ return ir.BitwiseXor(c, ir.BitwiseOr(a, b));
+ case 87:
+ return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)));
+ case 88:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c));
+ case 89:
+ return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
+ case 90:
+ return ir.BitwiseXor(a, c);
+ case 91:
+ return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c));
+ case 92:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b));
+ case 93:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c));
+ case 94:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c));
+ case 95:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, c));
+ case 96:
+ return ir.BitwiseAnd(a, ir.BitwiseXor(b, c));
+ case 97:
+ return ir.BitwiseXor(ir.BitwiseOr(b, c),
+ ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)));
+ case 98:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c));
+ case 99:
+ return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 100:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c));
+ case 101:
+ return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 102:
+ return ir.BitwiseXor(b, c);
+ case 103:
+ return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c));
+ case 104:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b)));
+ case 105:
+ return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
+ case 106:
+ return ir.BitwiseXor(c, ir.BitwiseAnd(a, b));
+ case 107:
+ return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)),
+ ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 108:
+ return ir.BitwiseXor(b, ir.BitwiseAnd(a, c));
+ case 109:
+ return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)),
+ ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 110:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
+ case 111:
+ return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
+ case 112:
+ return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
+ case 113:
+ return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)),
+ ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
+ case 114:
+ return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c));
+ case 115:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b));
+ case 116:
+ return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b));
+ case 117:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c));
+ case 118:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
+ case 119:
+ return ir.BitwiseNot(ir.BitwiseAnd(b, c));
+ case 120:
+ return ir.BitwiseXor(a, ir.BitwiseAnd(b, c));
+ case 121:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)),
+ ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 122:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
+ case 123:
+ return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
+ case 124:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
+ case 125:
+ return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
+ case 126:
+ return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
+ case 127:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)));
+ case 128:
+ return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c));
+ case 129:
+ return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+ case 130:
+ return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 131:
+ return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 132:
+ return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 133:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 134:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+ case 135:
+ return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
+ case 136:
+ return ir.BitwiseAnd(b, c);
+ case 137:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 138:
+ return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 139:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
+ case 140:
+ return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 141:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c)));
+ case 142:
+ return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+ case 143:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
+ case 144:
+ return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 145:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 146:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+ case 147:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
+ case 148:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+ case 149:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
+ case 150:
+ return ir.BitwiseXor(a, ir.BitwiseXor(b, c));
+ case 151:
+ return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)),
+ ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+ case 152:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 153:
+ return ir.BitwiseXor(b, ir.BitwiseNot(c));
+ case 154:
+ return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
+ case 155:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)));
+ case 156:
+ return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
+ case 157:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)));
+ case 158:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c)));
+ case 159:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c)));
+ case 160:
+ return ir.BitwiseAnd(a, c);
+ case 161:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 162:
+ return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
+ case 163:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
+ case 164:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 165:
+ return ir.BitwiseXor(a, ir.BitwiseNot(c));
+ case 166:
+ return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
+ case 167:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)));
+ case 168:
+ return ir.BitwiseAnd(c, ir.BitwiseOr(a, b));
+ case 169:
+ return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
+ case 170:
+ return c;
+ case 171:
+ return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
+ case 172:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 173:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 174:
+ return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
+ case 175:
+ return ir.BitwiseOr(c, ir.BitwiseNot(a));
+ case 176:
+ return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 177:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c)));
+ case 178:
+ return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+ case 179:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
+ case 180:
+ return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
+ case 181:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)));
+ case 182:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c)));
+ case 183:
+ return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c)));
+ case 184:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 185:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 186:
+ return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
+ case 187:
+ return ir.BitwiseOr(c, ir.BitwiseNot(b));
+ case 188:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b));
+ case 189:
+ return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 190:
+ return ir.BitwiseOr(c, ir.BitwiseXor(a, b));
+ case 191:
+ return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
+ case 192:
+ return ir.BitwiseAnd(a, b);
+ case 193:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 194:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 195:
+ return ir.BitwiseXor(a, ir.BitwiseNot(b));
+ case 196:
+ return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
+ case 197:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c)));
+ case 198:
+ return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
+ case 199:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)));
+ case 200:
+ return ir.BitwiseAnd(b, ir.BitwiseOr(a, c));
+ case 201:
+ return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
+ case 202:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 203:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 204:
+ return b;
+ case 205:
+ return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
+ case 206:
+ return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
+ case 207:
+ return ir.BitwiseOr(b, ir.BitwiseNot(a));
+ case 208:
+ return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 209:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c)));
+ case 210:
+ return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
+ case 211:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)));
+ case 212:
+ return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+ case 213:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
+ case 214:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b)));
+ case 215:
+ return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b)));
+ case 216:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 217:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 218:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c));
+ case 219:
+ return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 220:
+ return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
+ case 221:
+ return ir.BitwiseOr(b, ir.BitwiseNot(c));
+ case 222:
+ return ir.BitwiseOr(b, ir.BitwiseXor(a, c));
+ case 223:
+ return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
+ case 224:
+ return ir.BitwiseAnd(a, ir.BitwiseOr(b, c));
+ case 225:
+ return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
+ case 226:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
+ case 227:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 228:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
+ case 229:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 230:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c));
+ case 231:
+ return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
+ case 232:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
+ case 233:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b),
+ ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)));
+ case 234:
+ return ir.BitwiseOr(c, ir.BitwiseAnd(a, b));
+ case 235:
+ return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 236:
+ return ir.BitwiseOr(b, ir.BitwiseAnd(a, c));
+ case 237:
+ return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 238:
+ return ir.BitwiseOr(b, c);
+ case 239:
+ return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
+ case 240:
+ return a;
+ case 241:
+ return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
+ case 242:
+ return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
+ case 243:
+ return ir.BitwiseOr(a, ir.BitwiseNot(b));
+ case 244:
+ return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
+ case 245:
+ return ir.BitwiseOr(a, ir.BitwiseNot(c));
+ case 246:
+ return ir.BitwiseOr(a, ir.BitwiseXor(b, c));
+ case 247:
+ return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
+ case 248:
+ return ir.BitwiseOr(a, ir.BitwiseAnd(b, c));
+ case 249:
+ return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 250:
+ return ir.BitwiseOr(a, c);
+ case 251:
+ return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
+ case 252:
+ return ir.BitwiseOr(a, b);
+ case 253:
+ return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
+ case 254:
+ return ir.BitwiseOr(a, ir.BitwiseOr(b, c));
+ case 255:
+ return ir.Imm32(0xFFFFFFFF);
+ // end of generated code
}
- if (ttbl & 0x02) {
- // r |= ~a & ~b & c;
- const auto lhs{ir.BitwiseAnd(not_a, not_b)};
- const auto rhs{ir.BitwiseAnd(lhs, c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x04) {
- // r |= ~a & b & ~c;
- const auto lhs{ir.BitwiseAnd(not_a, b)};
- const auto rhs{ir.BitwiseAnd(lhs, not_c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x08) {
- // r |= ~a & b & c;
- const auto lhs{ir.BitwiseAnd(not_a, b)};
- const auto rhs{ir.BitwiseAnd(lhs, c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x10) {
- // r |= a & ~b & ~c;
- const auto lhs{ir.BitwiseAnd(a, not_b)};
- const auto rhs{ir.BitwiseAnd(lhs, not_c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x20) {
- // r |= a & ~b & c;
- const auto lhs{ir.BitwiseAnd(a, not_b)};
- const auto rhs{ir.BitwiseAnd(lhs, c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x40) {
- // r |= a & b & ~c;
- const auto lhs{ir.BitwiseAnd(a, b)};
- const auto rhs{ir.BitwiseAnd(lhs, not_c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x80) {
- // r |= a & b & c;
- const auto lhs{ir.BitwiseAnd(a, b)};
- const auto rhs{ir.BitwiseAnd(lhs, c)};
- r = ir.BitwiseOr(r, rhs);
- }
- return r;
+ throw NotImplementedException("LOP3 with out of range ttbl");
}
IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
new file mode 100644
index 000000000..8f547c266
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
@@ -0,0 +1,92 @@
+# Copyright © 2022 degasus <markus@selfnet.de>
+# This work is free. You can redistribute it and/or modify it under the
+# terms of the Do What The Fuck You Want To Public License, Version 2,
+# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
+
+from itertools import product
+
+# The primitive instructions
+OPS = {
+ 'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b),
+ 'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b),
+ 'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b),
+ 'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions
+}
+
+# Our database of combination of instructions
+optimized_calls = {}
+def cmp(lhs, rhs):
+ if lhs is None: # new entry
+ return True
+ if lhs[3] > rhs[3]: # costs
+ return True
+ if lhs[3] < rhs[3]: # costs
+ return False
+ if len(lhs[0]) > len(rhs[0]): # string len
+ return True
+ if len(lhs[0]) < len(rhs[0]): # string len
+ return False
+ if lhs[0] > rhs[0]: # string sorting
+ return True
+ if lhs[0] < rhs[0]: # string sorting
+ return False
+ assert lhs == rhs, "redundant instruction, bug in brute force"
+ return False
+def register(imm, instruction, count, latency):
+ # Use the sum of instruction count and latency as costs to evaluate which combination is best
+ costs = count + latency
+
+ old = optimized_calls.get(imm, None)
+ new = (instruction, count, latency, costs)
+
+ # Update if new or better
+ if cmp(old, new):
+ optimized_calls[imm] = new
+ return True
+
+ return False
+
+# Constants: 0, 1 (for free)
+register(0, 'ir.Imm32(0)', 0, 0)
+register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0)
+
+# Inputs: a, b, c (for free)
+ta = 0xF0
+tb = 0xCC
+tc = 0xAA
+inputs = {
+ ta : 'a',
+ tb : 'b',
+ tc : 'c',
+}
+for imm, instruction in inputs.items():
+ register(imm, instruction, 0, 0)
+ register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs
+
+# Try to combine two values from the db with an instruction.
+# If it is better than the old method, update it.
+while True:
+ registered = 0
+ calls_copy = optimized_calls.copy()
+ for OP, (argc, cost, f) in OPS.items():
+ for args in product(calls_copy.items(), repeat=argc):
+ # unpack(transponse) the arrays
+ imm = [arg[0] for arg in args]
+ value = [arg[1][0] for arg in args]
+ count = [arg[1][1] for arg in args]
+ latency = [arg[1][2] for arg in args]
+
+ registered += register(
+ f(*imm),
+ OP.format(*value),
+ sum(count) + cost,
+ max(latency) + cost)
+ if registered == 0:
+ # No update at all? So terminate
+ break
+
+# Hacky output. Please improve me to output valid C++ instead.
+s = """ case {imm}:
+ return {op};"""
+for imm in range(256):
+ print(s.format(imm=imm, op=optimized_calls[imm][0]))
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 248ad3ced..b22725584 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
}
Optimization::SsaRewritePass(program);
+ Optimization::ConstantPropagationPass(program);
+
Optimization::GlobalMemoryToStorageBufferPass(program);
Optimization::TexturePass(env, program);
- Optimization::ConstantPropagationPass(program);
-
if (Settings::values.resolution_info.active) {
Optimization::RescalingPass(program);
}
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 38592afd0..ddf497e32 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -334,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
/// Tries to track the storage buffer address used by a global memory instruction
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
- if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
+ if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
+ inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
return std::nullopt;
}
const IR::Value index{inst->Arg(0)};
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
index c28500dd1..496d4667e 100644
--- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp
+++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
@@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s
}
}
+void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
+ size_t index) {
+ const IR::Value composite{inst.Arg(index)};
+ if (composite.IsEmpty()) {
+ return;
+ }
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
+ const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
+ switch (info.type) {
+ case TextureType::ColorArray2D:
+ case TextureType::Color2D:
+ inst.SetArg(index, ir.CompositeConstruct(x, y));
+ break;
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color3D:
+ case TextureType::ColorCube:
+ case TextureType::ColorArrayCube:
+ case TextureType::Buffer:
+ // Nothing to patch here
+ break;
+ }
+}
+
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::Value coord{inst.Arg(1)};
@@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
// Scale ImageFetch offset
- ScaleIntegerComposite(ir, inst, is_scaled, 2);
+ ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
@@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
// Scale ImageFetch offset
- ScaleIntegerComposite(ir, inst, is_scaled, 2);
+ ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void PatchImageRead(IR::Block& block, IR::Inst& inst) {
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5d6d217bb..54a902f56 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@@ -195,7 +196,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
- return StartCBData(method);
+ return ProcessCBData(argument);
case MAXWELL3D_REG_INDEX(cb_bind[0]):
return ProcessCBBind(0);
case MAXWELL3D_REG_INDEX(cb_bind[1]):
@@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(4);
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
return DrawArrays();
+ case MAXWELL3D_REG_INDEX(small_index):
+ regs.index_array.count = regs.small_index.count;
+ regs.index_array.first = regs.small_index.first;
+ dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ return DrawArrays();
+ case MAXWELL3D_REG_INDEX(topology_override):
+ use_topology_override = true;
+ return;
case MAXWELL3D_REG_INDEX(clear_buffers):
return ProcessClearBuffers();
case MAXWELL3D_REG_INDEX(query.query_get):
@@ -248,14 +257,6 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
}
void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
- if (method == cb_data_state.current) {
- regs.reg_array[method] = method_argument;
- ProcessCBData(method_argument);
- return;
- } else if (cb_data_state.current != null_cb_data) {
- FinishCBData();
- }
-
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
@@ -302,7 +303,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
- ProcessCBMultiData(method, base_start, amount);
+ ProcessCBMultiData(base_start, amount);
break;
default:
for (std::size_t i = 0; i < amount; i++) {
@@ -360,6 +361,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
}
}
+void Maxwell3D::ProcessTopologyOverride() {
+ using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
+ using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
+
+ PrimitiveTopology topology{};
+
+ switch (regs.topology_override) {
+ case PrimitiveTopologyOverride::None:
+ topology = regs.draw.topology;
+ break;
+ case PrimitiveTopologyOverride::Points:
+ topology = PrimitiveTopology::Points;
+ break;
+ case PrimitiveTopologyOverride::Lines:
+ topology = PrimitiveTopology::Lines;
+ break;
+ case PrimitiveTopologyOverride::LineStrip:
+ topology = PrimitiveTopology::LineStrip;
+ break;
+ default:
+ topology = static_cast<PrimitiveTopology>(regs.topology_override);
+ break;
+ }
+
+ if (use_topology_override) {
+ regs.draw.topology.Assign(topology);
+ }
+}
+
void Maxwell3D::FlushMMEInlineDraw() {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
@@ -370,6 +400,8 @@ void Maxwell3D::FlushMMEInlineDraw() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
rasterizer->Draw(is_indexed, true);
@@ -529,6 +561,8 @@ void Maxwell3D::DrawArrays() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
if (regs.draw.instance_next) {
// Increment the current instance *before* drawing.
state.current_instance += 1;
@@ -587,46 +621,7 @@ void Maxwell3D::ProcessCBBind(size_t stage_index) {
rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
}
-void Maxwell3D::ProcessCBData(u32 value) {
- const u32 id = cb_data_state.id;
- cb_data_state.buffer[id][cb_data_state.counter] = value;
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
- cb_data_state.counter++;
-}
-
-void Maxwell3D::StartCBData(u32 method) {
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
- cb_data_state.start_pos = regs.const_buffer.cb_pos;
- cb_data_state.id = method - first_cb_data;
- cb_data_state.current = method;
- cb_data_state.counter = 0;
- ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
-}
-
-void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
- if (cb_data_state.current != method) {
- if (cb_data_state.current != null_cb_data) {
- FinishCBData();
- }
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
- cb_data_state.start_pos = regs.const_buffer.cb_pos;
- cb_data_state.id = method - first_cb_data;
- cb_data_state.current = method;
- cb_data_state.counter = 0;
- }
- const std::size_t id = cb_data_state.id;
- const std::size_t size = amount;
- std::size_t i = 0;
- for (; i < size; i++) {
- cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
- cb_data_state.counter++;
- }
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
-}
-
-void Maxwell3D::FinishCBData() {
+void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
// Write the input value to the current const buffer at the current position.
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
ASSERT(buffer_address != 0);
@@ -634,14 +629,16 @@ void Maxwell3D::FinishCBData() {
// Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
- const GPUVAddr address{buffer_address + cb_data_state.start_pos};
- const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
+ const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+ const size_t copy_size = amount * sizeof(u32);
+ memory_manager.WriteBlock(address, start_base, copy_size);
- const u32 id = cb_data_state.id;
- memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
+ // Increment the current buffer position.
+ regs.const_buffer.cb_pos += static_cast<u32>(copy_size);
+}
- cb_data_state.id = null_cb_data;
- cb_data_state.current = null_cb_data;
+void Maxwell3D::ProcessCBData(u32 value) {
+ ProcessCBMultiData(&value, 1);
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index dc9df6c8b..357a74c70 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -367,6 +367,22 @@ public:
Patches = 0xe,
};
+ // Constants as from NVC0_3D_UNK1970_D3D
+ // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598
+ enum class PrimitiveTopologyOverride : u32 {
+ None = 0x0,
+ Points = 0x1,
+ Lines = 0x2,
+ LineStrip = 0x3,
+ Triangles = 0x4,
+ TriangleStrip = 0x5,
+ LinesAdjacency = 0xa,
+ LineStripAdjacency = 0xb,
+ TrianglesAdjacency = 0xc,
+ TriangleStripAdjacency = 0xd,
+ Patches = 0xe,
+ };
+
enum class IndexFormat : u32 {
UnsignedByte = 0x0,
UnsignedShort = 0x1,
@@ -1200,7 +1216,12 @@ public:
}
} index_array;
- INSERT_PADDING_WORDS_NOINIT(0x7);
+ union {
+ BitField<0, 16, u32> first;
+ BitField<16, 16, u32> count;
+ } small_index;
+
+ INSERT_PADDING_WORDS_NOINIT(0x6);
INSERT_PADDING_WORDS_NOINIT(0x1F);
@@ -1244,7 +1265,11 @@ public:
BitField<11, 1, u32> depth_clamp_disabled;
} view_volume_clip_control;
- INSERT_PADDING_WORDS_NOINIT(0x1F);
+ INSERT_PADDING_WORDS_NOINIT(0xC);
+
+ PrimitiveTopologyOverride topology_override;
+
+ INSERT_PADDING_WORDS_NOINIT(0x12);
u32 depth_bounds_enable;
@@ -1520,10 +1545,8 @@ private:
void ProcessSyncPoint();
/// Handles a write to the CB_DATA[i] register.
- void StartCBData(u32 method);
void ProcessCBData(u32 value);
- void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);
- void FinishCBData();
+ void ProcessCBMultiData(const u32* start_base, u32 amount);
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
@@ -1531,6 +1554,9 @@ private:
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
+ /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
+ void ProcessTopologyOverride();
+
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
@@ -1555,20 +1581,10 @@ private:
/// Interpreter for the macro codes uploaded to the GPU.
std::unique_ptr<MacroEngine> macro_engine;
- static constexpr u32 null_cb_data = 0xFFFFFFFF;
- struct CBDataState {
- static constexpr size_t inline_size = 0x4000;
- std::array<std::array<u32, inline_size>, 16> buffer;
- u32 current{null_cb_data};
- u32 id{null_cb_data};
- u32 start_pos{};
- u32 counter{};
- };
- CBDataState cb_data_state;
-
Upload::State upload_state;
bool execute_on{true};
+ bool use_topology_override{false};
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1685,6 +1701,7 @@ ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(small_index, 0x5F9);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644);
@@ -1694,6 +1711,7 @@ ASSERT_REG_POSITION(cull_face, 0x648);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
+ASSERT_REG_POSITION(topology_override, 0x65C);
ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 67388d980..1fc1358bc 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -53,7 +53,6 @@ void MaxwellDMA::Launch() {
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
- ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
ASSERT(regs.dst_params.origin.x == 0);
@@ -79,6 +78,7 @@ void MaxwellDMA::Launch() {
CopyPitchToBlockLinear();
}
}
+ ReleaseSemaphore();
}
void MaxwellDMA::CopyPitchToPitch() {
@@ -244,4 +244,22 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
+void MaxwellDMA::ReleaseSemaphore() {
+ const auto type = regs.launch_dma.semaphore_type;
+ const GPUVAddr address = regs.semaphore.address;
+ switch (type) {
+ case LaunchDMA::SemaphoreType::NONE:
+ break;
+ case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE:
+ memory_manager.Write<u32>(address, regs.semaphore.payload);
+ break;
+ case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE:
+ memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload));
+ memory_manager.Write<u64>(address + 8, system.GPU().GetTicks());
+ break;
+ default:
+ UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value()));
+ }
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index a04514425..2692cac8a 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -224,6 +224,8 @@ private:
void FastCopyBlockLinearToPitch();
+ void ReleaseSemaphore();
+
Core::System& system;
MemoryManager& memory_manager;
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 151290101..293ad7d59 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -31,9 +31,8 @@ bool GLInnerFence::IsSignaled() const {
return true;
}
ASSERT(sync_object.handle != 0);
- GLsizei length;
GLint sync_status;
- glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
+ glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status);
return sync_status == GL_SIGNALED;
}
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index f8495896c..9e6732abd 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -243,10 +243,6 @@ GraphicsPipeline::GraphicsPipeline(
case Settings::ShaderBackend::GLASM:
if (!sources[stage].empty()) {
assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
- if (in_parallel) {
- // Make sure program is built before continuing when building in parallel
- glGetString(GL_PROGRAM_ERROR_STRING_NV);
- }
}
break;
case Settings::ShaderBackend::SPIRV:
@@ -256,20 +252,18 @@ GraphicsPipeline::GraphicsPipeline(
break;
}
}
- if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
- // Make sure programs have built if we are building shaders in parallel
- for (OGLProgram& program : source_programs) {
- if (program.handle != 0) {
- GLint status{};
- glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
- }
- }
+ if (in_parallel) {
+ std::lock_guard lock{built_mutex};
+ built_fence.Create();
+ // Flush this context to ensure compilation commands and fence are in the GPU pipe.
+ glFlush();
+ built_condvar.notify_one();
+ } else {
+ is_built = true;
}
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
- is_built = true;
- built_condvar.notify_one();
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
@@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
- if (!is_built.load(std::memory_order::relaxed)) {
+ if (!IsBuilt()) {
WaitForBuild();
}
const bool use_assembly{assembly_programs[0].handle != 0};
@@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
}
void GraphicsPipeline::WaitForBuild() {
- std::unique_lock lock{built_mutex};
- built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+ if (built_fence.handle == 0) {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
+ }
+ ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
+ is_built = true;
+}
+
+bool GraphicsPipeline::IsBuilt() noexcept {
+ if (is_built) {
+ return true;
+ }
+ if (built_fence.handle == 0) {
+ return false;
+ }
+ // Timeout of zero means this is non-blocking
+ const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0);
+ ASSERT(sync_status != GL_WAIT_FAILED);
+ is_built = sync_status != GL_TIMEOUT_EXPIRED;
+ return is_built;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 4e28d9a42..311d49f3f 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -100,9 +100,7 @@ public:
return writes_global_memory;
}
- [[nodiscard]] bool IsBuilt() const noexcept {
- return is_built.load(std::memory_order::relaxed);
- }
+ [[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
static auto MakeConfigureSpecFunc() {
@@ -154,7 +152,8 @@ private:
std::mutex built_mutex;
std::condition_variable built_condvar;
- std::atomic_bool is_built{false};
+ OGLSync built_fence{};
+ bool is_built{false};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 3e96c0f60..4d73427b4 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
#include <cstring>
#include <memory>
#include <optional>
@@ -292,7 +293,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
- const std::array push_constants{base_vertex, index_shift};
+ const std::array<u32, 2> push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 0f62779de..ca6019a3a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1067,7 +1067,8 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
}
break;
case PixelFormat::A8B8G8R8_UNORM:
- if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
+ if (src_view.format == PixelFormat::S8_UINT_D24_UNORM ||
+ src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
}
break;
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 329bf4def..2f2594585 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -50,6 +50,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
gpu->BindRenderer(std::move(renderer));
return gpu;
} catch (const std::runtime_error& exception) {
+ scope.Cancel();
LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr;
}
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index f915bd856..4b943c6ba 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -609,6 +609,7 @@ void Config::ReadCpuValues() {
ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
+ ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
if (global) {
ReadBasicSetting(Settings::values.cpu_debug_mode);
@@ -621,6 +622,8 @@ void Config::ReadCpuValues() {
ReadBasicSetting(Settings::values.cpuopt_misc_ir);
ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
ReadBasicSetting(Settings::values.cpuopt_fastmem);
+ ReadBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
+ ReadBasicSetting(Settings::values.cpuopt_recompile_exclusives);
}
qt_config->endGroup();
@@ -1139,6 +1142,7 @@ void Config::SaveCpuValues() {
WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
+ WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
if (global) {
WriteBasicSetting(Settings::values.cpu_debug_mode);
@@ -1151,6 +1155,8 @@ void Config::SaveCpuValues() {
WriteBasicSetting(Settings::values.cpuopt_misc_ir);
WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
WriteBasicSetting(Settings::values.cpuopt_fastmem);
+ WriteBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
+ WriteBasicSetting(Settings::values.cpuopt_recompile_exclusives);
}
qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp
index f66cab5d4..bf74ccc7c 100644
--- a/src/yuzu/configuration/configure_cpu.cpp
+++ b/src/yuzu/configuration/configure_cpu.cpp
@@ -36,6 +36,7 @@ void ConfigureCpu::SetConfiguration() {
ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock);
ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
+ ui->cpuopt_unsafe_ignore_global_monitor->setEnabled(runtime_lock);
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
ui->cpuopt_unsafe_reduce_fp_error->setChecked(
@@ -46,6 +47,8 @@ void ConfigureCpu::SetConfiguration() {
Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
ui->cpuopt_unsafe_fastmem_check->setChecked(
Settings::values.cpuopt_unsafe_fastmem_check.GetValue());
+ ui->cpuopt_unsafe_ignore_global_monitor->setChecked(
+ Settings::values.cpuopt_unsafe_ignore_global_monitor.GetValue());
if (Settings::IsConfiguringGlobal()) {
ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue()));
@@ -82,6 +85,9 @@ void ConfigureCpu::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check,
ui->cpuopt_unsafe_fastmem_check,
cpuopt_unsafe_fastmem_check);
+ ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_global_monitor,
+ ui->cpuopt_unsafe_ignore_global_monitor,
+ cpuopt_unsafe_ignore_global_monitor);
}
void ConfigureCpu::changeEvent(QEvent* event) {
@@ -120,4 +126,7 @@ void ConfigureCpu::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check,
Settings::values.cpuopt_unsafe_fastmem_check,
cpuopt_unsafe_fastmem_check);
+ ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_global_monitor,
+ Settings::values.cpuopt_unsafe_ignore_global_monitor,
+ cpuopt_unsafe_ignore_global_monitor);
}
diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h
index ed9af0e9f..733e38be4 100644
--- a/src/yuzu/configuration/configure_cpu.h
+++ b/src/yuzu/configuration/configure_cpu.h
@@ -45,6 +45,7 @@ private:
ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr;
ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
+ ConfigurationShared::CheckState cpuopt_unsafe_ignore_global_monitor;
const Core::System& system;
};
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui
index d8064db24..5d80a8c91 100644
--- a/src/yuzu/configuration/configure_cpu.ui
+++ b/src/yuzu/configuration/configure_cpu.ui
@@ -150,6 +150,18 @@
</property>
</widget>
</item>
+ <item>
+ <widget class="QCheckBox" name="cpuopt_unsafe_ignore_global_monitor">
+ <property name="toolTip">
+ <string>
+ &lt;div&gt;This option improves speed by relying only on the semantics of cmpxchg to ensure safety of exclusive access instructions. Please note this may result in deadlocks and other race conditions.&lt;/div&gt;
+ </string>
+ </property>
+ <property name="text">
+ <string>Ignore global monitor</string>
+ </property>
+ </widget>
+ </item>
</layout>
</widget>
</item>
diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp
index 05a90963d..616a0be75 100644
--- a/src/yuzu/configuration/configure_cpu_debug.cpp
+++ b/src/yuzu/configuration/configure_cpu_debug.cpp
@@ -44,6 +44,12 @@ void ConfigureCpuDebug::SetConfiguration() {
Settings::values.cpuopt_reduce_misalign_checks.GetValue());
ui->cpuopt_fastmem->setEnabled(runtime_lock);
ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue());
+ ui->cpuopt_fastmem_exclusives->setEnabled(runtime_lock);
+ ui->cpuopt_fastmem_exclusives->setChecked(
+ Settings::values.cpuopt_fastmem_exclusives.GetValue());
+ ui->cpuopt_recompile_exclusives->setEnabled(runtime_lock);
+ ui->cpuopt_recompile_exclusives->setChecked(
+ Settings::values.cpuopt_recompile_exclusives.GetValue());
}
void ConfigureCpuDebug::ApplyConfiguration() {
@@ -56,6 +62,8 @@ void ConfigureCpuDebug::ApplyConfiguration() {
Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked();
Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked();
Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked();
+ Settings::values.cpuopt_fastmem_exclusives = ui->cpuopt_fastmem_exclusives->isChecked();
+ Settings::values.cpuopt_recompile_exclusives = ui->cpuopt_recompile_exclusives->isChecked();
}
void ConfigureCpuDebug::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui
index 6e635bb2f..2bc268810 100644
--- a/src/yuzu/configuration/configure_cpu_debug.ui
+++ b/src/yuzu/configuration/configure_cpu_debug.ui
@@ -144,7 +144,34 @@
</string>
</property>
<property name="text">
- <string>Enable Host MMU Emulation</string>
+ <string>Enable Host MMU Emulation (general memory instructions)</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QCheckBox" name="cpuopt_fastmem_exclusives">
+ <property name="toolTip">
+ <string>
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it causes guest exclusive memory reads/writes to be done directly into memory and make use of Host's MMU.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;Disabling this forces all exclusive memory accesses to use Software MMU Emulation.&lt;/div&gt;
+ </string>
+ </property>
+ <property name="text">
+ <string>Enable Host MMU Emulation (exclusive memory instructions)</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QCheckBox" name="cpuopt_recompile_exclusives">
+ <property name="toolTip">
+ <string>
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it reduces the overhead of fastmem failure of exclusive memory accesses.&lt;/div&gt;
+ </string>
+ </property>
+ <property name="text">
+ <string>Enable recompilation of exclusive memory instructions</string>
</property>
</widget>
</item>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index d573829be..06774768d 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -250,9 +250,9 @@ GMainWindow::GMainWindow()
#ifdef ARCHITECTURE_x86_64
const auto& caps = Common::GetCPUCaps();
std::string cpu_string = caps.cpu_string;
- if (caps.avx || caps.avx2 || caps.avx512) {
+ if (caps.avx || caps.avx2 || caps.avx512f) {
cpu_string += " | AVX";
- if (caps.avx512) {
+ if (caps.avx512f) {
cpu_string += "512";
} else if (caps.avx2) {
cpu_string += '2';
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 30963a8bb..b74411c84 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -280,11 +280,14 @@ void Config::ReadValues() {
ReadSetting("Cpu", Settings::values.cpuopt_misc_ir);
ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks);
ReadSetting("Cpu", Settings::values.cpuopt_fastmem);
+ ReadSetting("Cpu", Settings::values.cpuopt_fastmem_exclusives);
+ ReadSetting("Cpu", Settings::values.cpuopt_recompile_exclusives);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check);
+ ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_global_monitor);
// Renderer
ReadSetting("Renderer", Settings::values.renderer_backend);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6d613bf7a..34782c378 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -124,7 +124,11 @@ keyboard_enabled =
[Core]
# Whether to use multi-core for CPU emulation
# 0: Disabled, 1 (default): Enabled
-use_multi_core=
+use_multi_core =
+
+# Enable extended guest system memory layout (6GB DRAM)
+# 0 (default): Disabled, 1: Enabled
+use_extended_memory_layout =
[Cpu]
# Adjusts various optimizations.
@@ -174,6 +178,14 @@ cpuopt_reduce_misalign_checks =
# 0: Disabled, 1 (default): Enabled
cpuopt_fastmem =
+# Enable Host MMU Emulation for exclusive memory instructions (faster guest memory access)
+# 0: Disabled, 1 (default): Enabled
+cpuopt_fastmem_exclusives =
+
+# Enable fallback on failure of fastmem of exclusive memory instructions (faster guest memory access)
+# 0: Disabled, 1 (default): Enabled
+cpuopt_recompile_exclusives =
+
# Enable unfuse FMA (improve performance on CPUs without FMA)
# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
# 0: Disabled, 1 (default): Enabled
@@ -199,6 +211,11 @@ cpuopt_unsafe_inaccurate_nan =
# 0: Disabled, 1 (default): Enabled
cpuopt_unsafe_fastmem_check =
+# Enable faster exclusive instructions
+# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
+# 0: Disabled, 1 (default): Enabled
+cpuopt_unsafe_ignore_global_monitor =
+
[Renderer]
# Which backend API to use.
# 0 (default): OpenGL, 1: Vulkan