From 16784e5bb3dd043f9430401097a4be42ad21eb91 Mon Sep 17 00:00:00 2001 From: merry Date: Sun, 27 Feb 2022 19:40:05 +0000 Subject: dynarmic: Inline exclusive memory accesses Inlines implementation of exclusive instructions into JITted code, improving performance of applications relying heavily on these instructions. We also fastmem these instructions for additional speed, with support for appropriate recompilation on fastmem failure. An unsafe optimization to disable the intercore global_monitor is also provided, should one wish to rely solely on cmpxchg semantics for safety. See also: merryhime/dynarmic#664 --- externals/dynarmic | 2 +- src/common/settings.cpp | 1 + src/common/settings.h | 3 +++ src/core/arm/dynarmic/arm_dynarmic_32.cpp | 12 ++++++++++ src/core/arm/dynarmic/arm_dynarmic_64.cpp | 13 +++++++++++ src/core/arm/dynarmic/arm_exclusive_monitor.cpp | 4 ++-- src/core/arm/dynarmic/arm_exclusive_monitor.h | 2 +- src/core/arm/exclusive_monitor.h | 2 +- src/core/hle/kernel/k_address_arbiter.cpp | 4 ++-- src/yuzu/configuration/config.cpp | 4 ++++ src/yuzu/configuration/configure_cpu.cpp | 9 ++++++++ src/yuzu/configuration/configure_cpu.h | 1 + src/yuzu/configuration/configure_cpu.ui | 12 ++++++++++ src/yuzu/configuration/configure_cpu_debug.cpp | 8 +++++++ src/yuzu/configuration/configure_cpu_debug.ui | 29 ++++++++++++++++++++++++- src/yuzu_cmd/config.cpp | 3 +++ src/yuzu_cmd/default_ini.h | 13 +++++++++++ 17 files changed, 114 insertions(+), 8 deletions(-) diff --git a/externals/dynarmic b/externals/dynarmic index 19a423034..f9696760d 160000 --- a/externals/dynarmic +++ b/externals/dynarmic @@ -1 +1 @@ -Subproject commit 19a423034e1abcaf1a61fa61ceffffebf45a0240 +Subproject commit f9696760db4f63a413093dedd185875da64dff58 diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 2810cec15..877e0faa4 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -176,6 +176,7 @@ void RestoreGlobalState(bool is_powered_on) { values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true); values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); values.cpuopt_unsafe_fastmem_check.SetGlobal(true); + values.cpuopt_unsafe_ignore_global_monitor.SetGlobal(true); // Renderer values.renderer_backend.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index d06b23a14..a37d83fb3 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -484,12 +484,15 @@ struct Values { BasicSetting cpuopt_misc_ir{true, "cpuopt_misc_ir"}; BasicSetting cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"}; BasicSetting cpuopt_fastmem{true, "cpuopt_fastmem"}; + BasicSetting cpuopt_fastmem_exclusives{true, "cpuopt_fastmem_exclusives"}; + BasicSetting cpuopt_recompile_exclusives{true, "cpuopt_recompile_exclusives"}; Setting cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"}; Setting cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"}; Setting cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"}; Setting cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"}; Setting cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"}; + Setting cpuopt_unsafe_ignore_global_monitor{true, "cpuopt_unsafe_ignore_global_monitor"}; // Renderer RangedSetting renderer_backend{ diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index b0d89c539..286976623 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -137,6 +137,8 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; config.only_detect_misalignment_via_page_table_on_page_boundary = true; + config.fastmem_exclusive_access = true; + config.recompile_on_exclusive_fastmem_failure = true; // Multi-process state config.processor_id = core_index; @@ -178,6 +180,12 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_fastmem) { config.fastmem_pointer = nullptr; } + if (!Settings::values.cpuopt_fastmem_exclusives) { + config.fastmem_exclusive_access = false; + } + if (!Settings::values.cpuopt_recompile_exclusives) { + config.recompile_on_exclusive_fastmem_failure = false; + } } // Unsafe optimizations @@ -195,6 +203,9 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_inaccurate_nan) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } } // Curated optimizations @@ -203,6 +214,7 @@ std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable* config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } return std::make_unique(config); diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 56836bd05..d96226c41 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -185,6 +185,9 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* config.fastmem_pointer = page_table->fastmem_arena; config.fastmem_address_space_bits = address_space_bits; config.silently_mirror_fastmem = false; + + config.fastmem_exclusive_access = true; + config.recompile_on_exclusive_fastmem_failure = true; } // Multi-process state @@ -237,6 +240,12 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* if (!Settings::values.cpuopt_fastmem) { config.fastmem_pointer = nullptr; } + if (!Settings::values.cpuopt_fastmem_exclusives) { + config.fastmem_exclusive_access = false; + } + if (!Settings::values.cpuopt_recompile_exclusives) { + config.recompile_on_exclusive_fastmem_failure = false; + } } // Unsafe optimizations @@ -254,6 +263,9 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_fastmem_check) { config.fastmem_address_space_bits = 64; } + if (Settings::values.cpuopt_unsafe_ignore_global_monitor) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; + } } // Curated optimizations @@ -262,6 +274,7 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable* config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; config.fastmem_address_space_bits = 64; + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; } return std::make_shared(config); diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp index 397d054a8..ea6b224e0 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp @@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad }); } -void DynarmicExclusiveMonitor::ClearExclusive() { - monitor.Clear(); +void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) { + monitor.ClearProcessor(core_index); } bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h index 265c4ecef..5a15b43ef 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.h +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h @@ -29,7 +29,7 @@ public: u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override; u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override; u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override; - void ClearExclusive() override; + void ClearExclusive(std::size_t core_index) override; bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override; diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index 62f6e6023..9914ca3da 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -23,7 +23,7 @@ public: virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0; virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0; virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0; - virtual void ClearExclusive() = 0; + virtual void ClearExclusive(std::size_t core_index) = 0; virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0; virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0; diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp index 783c69858..1d1f5e5f8 100644 --- a/src/core/hle/kernel/k_address_arbiter.cpp +++ b/src/core/hle/kernel/k_address_arbiter.cpp @@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu } } else { // Otherwise, clear our exclusive hold and finish - monitor.ClearExclusive(); + monitor.ClearExclusive(current_core); } // We're done. @@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 } } else { // Otherwise, clear our exclusive hold and finish. - monitor.ClearExclusive(); + monitor.ClearExclusive(current_core); } // We're done. diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index f915bd856..c2b66ff14 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -609,6 +609,7 @@ void Config::ReadCpuValues() { ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr); ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan); ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); + ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor); if (global) { ReadBasicSetting(Settings::values.cpu_debug_mode); @@ -621,6 +622,8 @@ void Config::ReadCpuValues() { ReadBasicSetting(Settings::values.cpuopt_misc_ir); ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks); ReadBasicSetting(Settings::values.cpuopt_fastmem); + ReadBasicSetting(Settings::values.cpuopt_fastmem_exclusives); + ReadBasicSetting(Settings::values.cpuopt_recompile_exclusives); } qt_config->endGroup(); @@ -1139,6 +1142,7 @@ void Config::SaveCpuValues() { WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr); WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan); WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); + WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor); if (global) { WriteBasicSetting(Settings::values.cpu_debug_mode); diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index f66cab5d4..bf74ccc7c 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -36,6 +36,7 @@ void ConfigureCpu::SetConfiguration() { ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock); ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock); + ui->cpuopt_unsafe_ignore_global_monitor->setEnabled(runtime_lock); ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); ui->cpuopt_unsafe_reduce_fp_error->setChecked( @@ -46,6 +47,8 @@ void ConfigureCpu::SetConfiguration() { Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); ui->cpuopt_unsafe_fastmem_check->setChecked( Settings::values.cpuopt_unsafe_fastmem_check.GetValue()); + ui->cpuopt_unsafe_ignore_global_monitor->setChecked( + Settings::values.cpuopt_unsafe_ignore_global_monitor.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->accuracy->setCurrentIndex(static_cast(Settings::values.cpu_accuracy.GetValue())); @@ -82,6 +85,9 @@ void ConfigureCpu::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check, ui->cpuopt_unsafe_fastmem_check, cpuopt_unsafe_fastmem_check); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_global_monitor, + ui->cpuopt_unsafe_ignore_global_monitor, + cpuopt_unsafe_ignore_global_monitor); } void ConfigureCpu::changeEvent(QEvent* event) { @@ -120,4 +126,7 @@ void ConfigureCpu::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check, Settings::values.cpuopt_unsafe_fastmem_check, cpuopt_unsafe_fastmem_check); + ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_global_monitor, + Settings::values.cpuopt_unsafe_ignore_global_monitor, + cpuopt_unsafe_ignore_global_monitor); } diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h index ed9af0e9f..733e38be4 100644 --- a/src/yuzu/configuration/configure_cpu.h +++ b/src/yuzu/configuration/configure_cpu.h @@ -45,6 +45,7 @@ private: ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr; ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check; + ConfigurationShared::CheckState cpuopt_unsafe_ignore_global_monitor; const Core::System& system; }; diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index d8064db24..5d80a8c91 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -150,6 +150,18 @@ + + + + + <div>This option improves speed by relying only on the semantics of cmpxchg to ensure safety of exclusive access instructions. Please note this may result in deadlocks and other race conditions.</div> + + + + Ignore global monitor + + + diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp index 05a90963d..616a0be75 100644 --- a/src/yuzu/configuration/configure_cpu_debug.cpp +++ b/src/yuzu/configuration/configure_cpu_debug.cpp @@ -44,6 +44,12 @@ void ConfigureCpuDebug::SetConfiguration() { Settings::values.cpuopt_reduce_misalign_checks.GetValue()); ui->cpuopt_fastmem->setEnabled(runtime_lock); ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue()); + ui->cpuopt_fastmem_exclusives->setEnabled(runtime_lock); + ui->cpuopt_fastmem_exclusives->setChecked( + Settings::values.cpuopt_fastmem_exclusives.GetValue()); + ui->cpuopt_recompile_exclusives->setEnabled(runtime_lock); + ui->cpuopt_recompile_exclusives->setChecked( + Settings::values.cpuopt_recompile_exclusives.GetValue()); } void ConfigureCpuDebug::ApplyConfiguration() { @@ -56,6 +62,8 @@ void ConfigureCpuDebug::ApplyConfiguration() { Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked(); Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked(); Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked(); + Settings::values.cpuopt_fastmem_exclusives = ui->cpuopt_fastmem_exclusives->isChecked(); + Settings::values.cpuopt_recompile_exclusives = ui->cpuopt_recompile_exclusives->isChecked(); } void ConfigureCpuDebug::changeEvent(QEvent* event) { diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui index 6e635bb2f..2bc268810 100644 --- a/src/yuzu/configuration/configure_cpu_debug.ui +++ b/src/yuzu/configuration/configure_cpu_debug.ui @@ -144,7 +144,34 @@ - Enable Host MMU Emulation + Enable Host MMU Emulation (general memory instructions) + + + + + + + + <div style="white-space: nowrap">This optimization speeds up exclusive memory accesses by the guest program.</div> + <div style="white-space: nowrap">Enabling it causes guest exclusive memory reads/writes to be done directly into memory and make use of Host's MMU.</div> + <div style="white-space: nowrap">Disabling this forces all exclusive memory accesses to use Software MMU Emulation.</div> + + + + Enable Host MMU Emulation (exclusive memory instructions) + + + + + + + + <div style="white-space: nowrap">This optimization speeds up exclusive memory accesses by the guest program.</div> + <div style="white-space: nowrap">Enabling it reduces the overhead of fastmem failure of exclusive memory accesses.</div> + + + + Enable recompilation of exclusive memory instructions diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 30963a8bb..b74411c84 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -280,11 +280,14 @@ void Config::ReadValues() { ReadSetting("Cpu", Settings::values.cpuopt_misc_ir); ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks); ReadSetting("Cpu", Settings::values.cpuopt_fastmem); + ReadSetting("Cpu", Settings::values.cpuopt_fastmem_exclusives); + ReadSetting("Cpu", Settings::values.cpuopt_recompile_exclusives); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan); ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check); + ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_global_monitor); // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 6d613bf7a..3ac1440c9 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -174,6 +174,14 @@ cpuopt_reduce_misalign_checks = # 0: Disabled, 1 (default): Enabled cpuopt_fastmem = +# Enable Host MMU Emulation for exclusive memory instructions (faster guest memory access) +# 0: Disabled, 1 (default): Enabled +cpuopt_fastmem_exclusives = + +# Enable fallback on failure of fastmem of exclusive memory instructions (faster guest memory access) +# 0: Disabled, 1 (default): Enabled +cpuopt_recompile_exclusives = + # Enable unfuse FMA (improve performance on CPUs without FMA) # Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select. # 0: Disabled, 1 (default): Enabled @@ -199,6 +207,11 @@ cpuopt_unsafe_inaccurate_nan = # 0: Disabled, 1 (default): Enabled cpuopt_unsafe_fastmem_check = +# Enable faster exclusive instructions +# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select. +# 0: Disabled, 1 (default): Enabled +cpuopt_unsafe_ignore_global_monitor = + [Renderer] # Which backend API to use. # 0 (default): OpenGL, 1: Vulkan -- cgit v1.2.3