diff options
Diffstat (limited to '')
83 files changed, 2167 insertions, 784 deletions
@@ -1,6 +1,6 @@ yuzu emulator ============= -[![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu) +[![Travis CI Build Status](https://travis-ci.com/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.com/yuzu-emu/yuzu) [![Azure Mainline CI Build Status](https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master)](https://dev.azure.com/yuzu-emu/yuzu/) yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). @@ -21,7 +21,7 @@ For development discussion, please join us on [Discord](https://discord.gg/XQV6d Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted. -If you want to contribute please take a look at the [Contributor's Guide](CONTRIBUTING.md) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). You should as well contact any of the developers on Discord in order to know about the current state of the emulator. +If you want to contribute please take a look at the [Contributor's Guide](CONTRIBUTING.md) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). You should also contact any of the developers on Discord in order to know about the current state of the emulator. ### Building diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp index a58f24169..49ab9d3e1 100644 --- a/src/audio_core/algorithm/interpolate.cpp +++ b/src/audio_core/algorithm/interpolate.cpp @@ -8,13 +8,14 @@ #include <climits> #include <cmath> #include <vector> + #include "audio_core/algorithm/interpolate.h" #include "common/common_types.h" #include "common/logging/log.h" namespace AudioCore { -constexpr std::array<s16, 512> curve_lut0 = { +constexpr std::array<s16, 512> curve_lut0{ 6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239, 19412, 7093, 22, 6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377, 7472, 41, 5773, 19361, 7600, 48, 5659, 19342, 7728, 55, 5546, 19321, 7857, @@ -56,7 +57,7 @@ constexpr std::array<s16, 512> curve_lut0 = { 19403, 6121, 22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424, 6479, 3, 6722, 19426, 6600}; -constexpr std::array<s16, 512> curve_lut1 = { +constexpr std::array<s16, 512> curve_lut1{ -68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450, 32586, 512, -36, -568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454, 1000, -69, -891, 32393, 1174, -80, -990, 32323, 1352, -92, -1084, 32244, 1536, @@ -98,7 +99,7 @@ constexpr std::array<s16, 512> curve_lut1 = { 32551, -568, -36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630, -200, -5, 69, 32639, -68}; -constexpr std::array<s16, 512> curve_lut2 = { +constexpr std::array<s16, 512> curve_lut2{ 3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811, 26253, 3751, -42, 2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169, 4199, -54, 2338, 26130, 4354, -58, 2227, 26085, 4512, -63, 2120, 26035, 4673, @@ -146,10 +147,10 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, if (ratio <= 0) { LOG_CRITICAL(Audio, "Nonsensical interpolation ratio {}", ratio); - ratio = 1.0; + return input; } - const int step = static_cast<int>(ratio * 0x8000); + const s32 step{static_cast<s32>(ratio * 0x8000)}; const std::array<s16, 512>& lut = [step] { if (step > 0xaaaa) { return curve_lut0; @@ -160,28 +161,37 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, return curve_lut2; }(); - std::vector<s16> output(static_cast<std::size_t>(input.size() / ratio)); - int in_offset = 0; - for (std::size_t out_offset = 0; out_offset < output.size(); out_offset += 2) { - const int lut_index = (state.fraction >> 8) * 4; + const std::size_t num_frames{input.size() / 2}; + + std::vector<s16> output; + output.reserve(static_cast<std::size_t>(input.size() / ratio + InterpolationState::taps)); - const int l = input[(in_offset + 0) * 2 + 0] * lut[lut_index + 0] + - input[(in_offset + 1) * 2 + 0] * lut[lut_index + 1] + - input[(in_offset + 2) * 2 + 0] * lut[lut_index + 2] + - input[(in_offset + 3) * 2 + 0] * lut[lut_index + 3]; + for (std::size_t frame{}; frame < num_frames; ++frame) { + const std::size_t lut_index{(state.fraction >> 8) * InterpolationState::taps}; - const int r = input[(in_offset + 0) * 2 + 1] * lut[lut_index + 0] + - input[(in_offset + 1) * 2 + 1] * lut[lut_index + 1] + - input[(in_offset + 2) * 2 + 1] * lut[lut_index + 2] + - input[(in_offset + 3) * 2 + 1] * lut[lut_index + 3]; + std::rotate(state.history.begin(), state.history.end() - 1, state.history.end()); + state.history[0][0] = input[frame * 2 + 0]; + state.history[0][1] = input[frame * 2 + 1]; - const int new_offset = state.fraction + step; + while (state.position <= 1.0) { + const s32 left{state.history[0][0] * lut[lut_index + 0] + + state.history[1][0] * lut[lut_index + 1] + + state.history[2][0] * lut[lut_index + 2] + + state.history[3][0] * lut[lut_index + 3]}; + const s32 right{state.history[0][1] * lut[lut_index + 0] + + state.history[1][1] * lut[lut_index + 1] + + state.history[2][1] * lut[lut_index + 2] + + state.history[3][1] * lut[lut_index + 3]}; + const s32 new_offset{state.fraction + step}; - in_offset += new_offset >> 15; - state.fraction = new_offset & 0x7fff; + state.fraction = new_offset & 0x7fff; - output[out_offset + 0] = static_cast<s16>(std::clamp(l >> 15, SHRT_MIN, SHRT_MAX)); - output[out_offset + 1] = static_cast<s16>(std::clamp(r >> 15, SHRT_MIN, SHRT_MAX)); + output.emplace_back(static_cast<s16>(std::clamp(left >> 15, SHRT_MIN, SHRT_MAX))); + output.emplace_back(static_cast<s16>(std::clamp(right >> 15, SHRT_MIN, SHRT_MAX))); + + state.position += ratio; + } + state.position -= 1.0; } return output; diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h index 1b9831a75..ab1a31754 100644 --- a/src/audio_core/algorithm/interpolate.h +++ b/src/audio_core/algorithm/interpolate.h @@ -6,12 +6,17 @@ #include <array> #include <vector> + #include "common/common_types.h" namespace AudioCore { struct InterpolationState { - int fraction = 0; + static constexpr std::size_t taps{4}; + static constexpr std::size_t history_size{taps * 2 - 1}; + std::array<std::array<s16, 2>, history_size> history{}; + double position{}; + s32 fraction{}; }; /// Interpolates input signal to produce output signal. diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 88c06b2ce..b31a0328c 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -131,8 +131,8 @@ add_library(core STATIC frontend/framebuffer_layout.cpp frontend/framebuffer_layout.h frontend/input.h - frontend/scope_acquire_window_context.cpp - frontend/scope_acquire_window_context.h + frontend/scope_acquire_context.cpp + frontend/scope_acquire_context.h gdbstub/gdbstub.cpp gdbstub/gdbstub.h hardware_interrupt_manager.cpp @@ -595,8 +595,12 @@ endif() if (ARCHITECTURE_x86_64) target_sources(core PRIVATE - arm/dynarmic/arm_dynarmic.cpp - arm/dynarmic/arm_dynarmic.h + arm/dynarmic/arm_dynarmic_32.cpp + arm/dynarmic/arm_dynarmic_32.h + arm/dynarmic/arm_dynarmic_64.cpp + arm/dynarmic/arm_dynarmic_64.h + arm/dynarmic/arm_dynarmic_cp15.cpp + arm/dynarmic/arm_dynarmic_cp15.h ) target_link_libraries(core PRIVATE dynarmic) endif() diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 47b964eb7..57eae839e 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -25,7 +25,20 @@ public: explicit ARM_Interface(System& system_) : system{system_} {} virtual ~ARM_Interface() = default; - struct ThreadContext { + struct ThreadContext32 { + std::array<u32, 16> cpu_registers; + u32 cpsr; + std::array<u8, 4> padding; + std::array<u64, 32> fprs; + u32 fpscr; + u32 fpexc; + u32 tpidr; + }; + // Internally within the kernel, it expects the AArch32 version of the + // thread context to be 344 bytes in size. + static_assert(sizeof(ThreadContext32) == 0x158); + + struct ThreadContext64 { std::array<u64, 31> cpu_registers; u64 sp; u64 pc; @@ -38,7 +51,7 @@ public: }; // Internally within the kernel, it expects the AArch64 version of the // thread context to be 800 bytes in size. - static_assert(sizeof(ThreadContext) == 0x320); + static_assert(sizeof(ThreadContext64) == 0x320); /// Runs the CPU until an event happens virtual void Run() = 0; @@ -130,17 +143,10 @@ public: */ virtual void SetTPIDR_EL0(u64 value) = 0; - /** - * Saves the current CPU context - * @param ctx Thread context to save - */ - virtual void SaveContext(ThreadContext& ctx) = 0; - - /** - * Loads a CPU context - * @param ctx Thread context to load - */ - virtual void LoadContext(const ThreadContext& ctx) = 0; + virtual void SaveContext(ThreadContext32& ctx) = 0; + virtual void SaveContext(ThreadContext64& ctx) = 0; + virtual void LoadContext(const ThreadContext32& ctx) = 0; + virtual void LoadContext(const ThreadContext64& ctx) = 0; /// Clears the exclusive monitor's state. virtual void ClearExclusiveState() = 0; diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp new file mode 100644 index 000000000..187a972ac --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -0,0 +1,208 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cinttypes> +#include <memory> +#include <dynarmic/A32/a32.h> +#include <dynarmic/A32/config.h> +#include <dynarmic/A32/context.h> +#include "common/microprofile.h" +#include "core/arm/dynarmic/arm_dynarmic_32.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" +#include "core/arm/dynarmic/arm_dynarmic_cp15.h" +#include "core/core.h" +#include "core/core_manager.h" +#include "core/core_timing.h" +#include "core/hle/kernel/svc.h" +#include "core/memory.h" + +namespace Core { + +class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks { +public: + explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent) : parent(parent) {} + + u8 MemoryRead8(u32 vaddr) override { + return parent.system.Memory().Read8(vaddr); + } + u16 MemoryRead16(u32 vaddr) override { + return parent.system.Memory().Read16(vaddr); + } + u32 MemoryRead32(u32 vaddr) override { + return parent.system.Memory().Read32(vaddr); + } + u64 MemoryRead64(u32 vaddr) override { + return parent.system.Memory().Read64(vaddr); + } + + void MemoryWrite8(u32 vaddr, u8 value) override { + parent.system.Memory().Write8(vaddr, value); + } + void MemoryWrite16(u32 vaddr, u16 value) override { + parent.system.Memory().Write16(vaddr, value); + } + void MemoryWrite32(u32 vaddr, u32 value) override { + parent.system.Memory().Write32(vaddr, value); + } + void MemoryWrite64(u32 vaddr, u64 value) override { + parent.system.Memory().Write64(vaddr, value); + } + + void InterpreterFallback(u32 pc, std::size_t num_instructions) override { + UNIMPLEMENTED(); + } + + void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { + switch (exception) { + case Dynarmic::A32::Exception::UndefinedInstruction: + case Dynarmic::A32::Exception::UnpredictableInstruction: + break; + case Dynarmic::A32::Exception::Breakpoint: + break; + } + LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", + static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); + UNIMPLEMENTED(); + } + + void CallSVC(u32 swi) override { + Kernel::CallSVC(parent.system, swi); + } + + void AddTicks(u64 ticks) override { + // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a + // rough approximation of the amount of executed ticks in the system, it may be thrown off + // if not all cores are doing a similar amount of work. Instead of doing this, we should + // device a way so that timing is consistent across all cores without increasing the ticks 4 + // times. + u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; + // Always execute at least one tick. + amortized_ticks = std::max<u64>(amortized_ticks, 1); + + parent.system.CoreTiming().AddTicks(amortized_ticks); + num_interpreted_instructions = 0; + } + u64 GetTicksRemaining() override { + return std::max(parent.system.CoreTiming().GetDowncount(), {}); + } + + ARM_Dynarmic_32& parent; + std::size_t num_interpreted_instructions{}; + u64 tpidrro_el0{}; + u64 tpidr_el0{}; +}; + +std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const { + Dynarmic::A32::UserConfig config; + config.callbacks = cb.get(); + // TODO(bunnei): Implement page table for 32-bit + // config.page_table = &page_table.pointers; + config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]); + config.define_unpredictable_behaviour = true; + return std::make_unique<Dynarmic::A32::Jit>(config); +} + +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); + +void ARM_Dynarmic_32::Run() { + MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32); + jit->Run(); +} + +void ARM_Dynarmic_32::Step() { + cb->InterpreterFallback(jit->Regs()[15], 1); +} + +ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, + std::size_t core_index) + : ARM_Interface{system}, + cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index}, + exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} + +ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; + +void ARM_Dynarmic_32::SetPC(u64 pc) { + jit->Regs()[15] = static_cast<u32>(pc); +} + +u64 ARM_Dynarmic_32::GetPC() const { + return jit->Regs()[15]; +} + +u64 ARM_Dynarmic_32::GetReg(int index) const { + return jit->Regs()[index]; +} + +void ARM_Dynarmic_32::SetReg(int index, u64 value) { + jit->Regs()[index] = static_cast<u32>(value); +} + +u128 ARM_Dynarmic_32::GetVectorReg(int index) const { + return {}; +} + +void ARM_Dynarmic_32::SetVectorReg(int index, u128 value) {} + +u32 ARM_Dynarmic_32::GetPSTATE() const { + return jit->Cpsr(); +} + +void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) { + jit->SetCpsr(cpsr); +} + +u64 ARM_Dynarmic_32::GetTlsAddress() const { + return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; +} + +void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { + CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address); +} + +u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { + return cb->tpidr_el0; +} + +void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { + cb->tpidr_el0 = value; +} + +void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { + Dynarmic::A32::Context context; + jit->SaveContext(context); + ctx.cpu_registers = context.Regs(); + ctx.cpsr = context.Cpsr(); +} + +void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) { + Dynarmic::A32::Context context; + context.Regs() = ctx.cpu_registers; + context.SetCpsr(ctx.cpsr); + jit->LoadContext(context); +} + +void ARM_Dynarmic_32::PrepareReschedule() { + jit->HaltExecution(); +} + +void ARM_Dynarmic_32::ClearInstructionCache() { + jit->ClearCache(); +} + +void ARM_Dynarmic_32::ClearExclusiveState() {} + +void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table, + std::size_t new_address_space_size_in_bits) { + auto key = std::make_pair(&page_table, new_address_space_size_in_bits); + auto iter = jit_cache.find(key); + if (iter != jit_cache.end()) { + jit = iter->second; + return; + } + jit = MakeJit(page_table, new_address_space_size_in_bits); + jit_cache.emplace(key, jit); +} + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h new file mode 100644 index 000000000..143e46e4d --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -0,0 +1,77 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <unordered_map> + +#include <dynarmic/A32/a32.h> +#include <dynarmic/A64/a64.h> +#include <dynarmic/A64/exclusive_monitor.h> +#include "common/common_types.h" +#include "common/hash.h" +#include "core/arm/arm_interface.h" +#include "core/arm/exclusive_monitor.h" + +namespace Memory { +class Memory; +} + +namespace Core { + +class DynarmicCallbacks32; +class DynarmicExclusiveMonitor; +class System; + +class ARM_Dynarmic_32 final : public ARM_Interface { +public: + ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ~ARM_Dynarmic_32() override; + + void SetPC(u64 pc) override; + u64 GetPC() const override; + u64 GetReg(int index) const override; + void SetReg(int index, u64 value) override; + u128 GetVectorReg(int index) const override; + void SetVectorReg(int index, u128 value) override; + u32 GetPSTATE() const override; + void SetPSTATE(u32 pstate) override; + void Run() override; + void Step() override; + VAddr GetTlsAddress() const override; + void SetTlsAddress(VAddr address) override; + void SetTPIDR_EL0(u64 value) override; + u64 GetTPIDR_EL0() const override; + + void SaveContext(ThreadContext32& ctx) override; + void SaveContext(ThreadContext64& ctx) override {} + void LoadContext(const ThreadContext32& ctx) override; + void LoadContext(const ThreadContext64& ctx) override {} + + void PrepareReschedule() override; + void ClearExclusiveState() override; + + void ClearInstructionCache() override; + void PageTableChanged(Common::PageTable& new_page_table, + std::size_t new_address_space_size_in_bits) override; + +private: + std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const; + + using JitCacheKey = std::pair<Common::PageTable*, std::size_t>; + using JitCacheType = + std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; + + friend class DynarmicCallbacks32; + std::unique_ptr<DynarmicCallbacks32> cb; + JitCacheType jit_cache; + std::shared_ptr<Dynarmic::A32::Jit> jit; + std::size_t core_index; + DynarmicExclusiveMonitor& exclusive_monitor; + std::array<u32, 84> CP15_regs{}; +}; + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 29eaf74e5..a53a58ba0 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -8,7 +8,7 @@ #include <dynarmic/A64/config.h> #include "common/logging/log.h" #include "common/microprofile.h" -#include "core/arm/dynarmic/arm_dynarmic.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" #include "core/core.h" #include "core/core_manager.h" #include "core/core_timing.h" @@ -25,9 +25,9 @@ namespace Core { using Vector = Dynarmic::A64::Vector; -class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks { +class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks { public: - explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {} + explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent) : parent(parent) {} u8 MemoryRead8(u64 vaddr) override { return parent.system.Memory().Read8(vaddr); @@ -68,7 +68,7 @@ public: LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc, num_instructions, MemoryReadCode(pc)); - ARM_Interface::ThreadContext ctx; + ARM_Interface::ThreadContext64 ctx; parent.SaveContext(ctx); parent.inner_unicorn.LoadContext(ctx); parent.inner_unicorn.ExecuteInstructions(num_instructions); @@ -90,7 +90,7 @@ public: parent.jit->HaltExecution(); parent.SetPC(pc); Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread(); - parent.SaveContext(thread->GetContext()); + parent.SaveContext(thread->GetContext64()); GDBStub::Break(); GDBStub::SendTrap(thread, 5); return; @@ -126,14 +126,14 @@ public: return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); } - ARM_Dynarmic& parent; + ARM_Dynarmic_64& parent; std::size_t num_interpreted_instructions = 0; u64 tpidrro_el0 = 0; u64 tpidr_el0 = 0; }; -std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table, - std::size_t address_space_bits) const { +std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table, + std::size_t address_space_bits) const { Dynarmic::A64::UserConfig config; // Callbacks @@ -159,79 +159,79 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag // Unpredictable instructions config.define_unpredictable_behaviour = true; - return std::make_unique<Dynarmic::A64::Jit>(config); + return std::make_shared<Dynarmic::A64::Jit>(config); } -MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); -void ARM_Dynarmic::Run() { - MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); +void ARM_Dynarmic_64::Run() { + MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64); jit->Run(); } -void ARM_Dynarmic::Step() { +void ARM_Dynarmic_64::Step() { cb->InterpreterFallback(jit->GetPC(), 1); } -ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, - std::size_t core_index) +ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, + std::size_t core_index) : ARM_Interface{system}, - cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, + cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system}, core_index{core_index}, exclusive_monitor{ dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} -ARM_Dynarmic::~ARM_Dynarmic() = default; +ARM_Dynarmic_64::~ARM_Dynarmic_64() = default; -void ARM_Dynarmic::SetPC(u64 pc) { +void ARM_Dynarmic_64::SetPC(u64 pc) { jit->SetPC(pc); } -u64 ARM_Dynarmic::GetPC() const { +u64 ARM_Dynarmic_64::GetPC() const { return jit->GetPC(); } -u64 ARM_Dynarmic::GetReg(int index) const { +u64 ARM_Dynarmic_64::GetReg(int index) const { return jit->GetRegister(index); } -void ARM_Dynarmic::SetReg(int index, u64 value) { +void ARM_Dynarmic_64::SetReg(int index, u64 value) { jit->SetRegister(index, value); } -u128 ARM_Dynarmic::GetVectorReg(int index) const { +u128 ARM_Dynarmic_64::GetVectorReg(int index) const { return jit->GetVector(index); } -void ARM_Dynarmic::SetVectorReg(int index, u128 value) { +void ARM_Dynarmic_64::SetVectorReg(int index, u128 value) { jit->SetVector(index, value); } -u32 ARM_Dynarmic::GetPSTATE() const { +u32 ARM_Dynarmic_64::GetPSTATE() const { return jit->GetPstate(); } -void ARM_Dynarmic::SetPSTATE(u32 pstate) { +void ARM_Dynarmic_64::SetPSTATE(u32 pstate) { jit->SetPstate(pstate); } -u64 ARM_Dynarmic::GetTlsAddress() const { +u64 ARM_Dynarmic_64::GetTlsAddress() const { return cb->tpidrro_el0; } -void ARM_Dynarmic::SetTlsAddress(VAddr address) { +void ARM_Dynarmic_64::SetTlsAddress(VAddr address) { cb->tpidrro_el0 = address; } -u64 ARM_Dynarmic::GetTPIDR_EL0() const { +u64 ARM_Dynarmic_64::GetTPIDR_EL0() const { return cb->tpidr_el0; } -void ARM_Dynarmic::SetTPIDR_EL0(u64 value) { +void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) { cb->tpidr_el0 = value; } -void ARM_Dynarmic::SaveContext(ThreadContext& ctx) { +void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) { ctx.cpu_registers = jit->GetRegisters(); ctx.sp = jit->GetSP(); ctx.pc = jit->GetPC(); @@ -242,7 +242,7 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) { ctx.tpidr = cb->tpidr_el0; } -void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { +void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) { jit->SetRegisters(ctx.cpu_registers); jit->SetSP(ctx.sp); jit->SetPC(ctx.pc); @@ -253,25 +253,32 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { SetTPIDR_EL0(ctx.tpidr); } -void ARM_Dynarmic::PrepareReschedule() { +void ARM_Dynarmic_64::PrepareReschedule() { jit->HaltExecution(); } -void ARM_Dynarmic::ClearInstructionCache() { +void ARM_Dynarmic_64::ClearInstructionCache() { jit->ClearCache(); } -void ARM_Dynarmic::ClearExclusiveState() { +void ARM_Dynarmic_64::ClearExclusiveState() { jit->ClearExclusiveState(); } -void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table, - std::size_t new_address_space_size_in_bits) { +void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table, + std::size_t new_address_space_size_in_bits) { + auto key = std::make_pair(&page_table, new_address_space_size_in_bits); + auto iter = jit_cache.find(key); + if (iter != jit_cache.end()) { + jit = iter->second; + return; + } jit = MakeJit(page_table, new_address_space_size_in_bits); + jit_cache.emplace(key, jit); } -DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count) - : monitor(core_count), memory{memory_} {} +DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count) + : monitor(core_count), memory{memory} {} DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 9cd475cfb..e71240a96 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -5,9 +5,12 @@ #pragma once #include <memory> +#include <unordered_map> + #include <dynarmic/A64/a64.h> #include <dynarmic/A64/exclusive_monitor.h> #include "common/common_types.h" +#include "common/hash.h" #include "core/arm/arm_interface.h" #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" @@ -18,14 +21,14 @@ class Memory; namespace Core { -class ARM_Dynarmic_Callbacks; +class DynarmicCallbacks64; class DynarmicExclusiveMonitor; class System; -class ARM_Dynarmic final : public ARM_Interface { +class ARM_Dynarmic_64 final : public ARM_Interface { public: - ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); - ~ARM_Dynarmic() override; + ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ~ARM_Dynarmic_64() override; void SetPC(u64 pc) override; u64 GetPC() const override; @@ -42,8 +45,10 @@ public: void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void SaveContext(ThreadContext& ctx) override; - void LoadContext(const ThreadContext& ctx) override; + void SaveContext(ThreadContext32& ctx) override {} + void SaveContext(ThreadContext64& ctx) override; + void LoadContext(const ThreadContext32& ctx) override {} + void LoadContext(const ThreadContext64& ctx) override; void PrepareReschedule() override; void ClearExclusiveState() override; @@ -53,12 +58,17 @@ public: std::size_t new_address_space_size_in_bits) override; private: - std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, + std::shared_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, std::size_t address_space_bits) const; - friend class ARM_Dynarmic_Callbacks; - std::unique_ptr<ARM_Dynarmic_Callbacks> cb; - std::unique_ptr<Dynarmic::A64::Jit> jit; + using JitCacheKey = std::pair<Common::PageTable*, std::size_t>; + using JitCacheType = + std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A64::Jit>, Common::PairHash>; + + friend class DynarmicCallbacks64; + std::unique_ptr<DynarmicCallbacks64> cb; + JitCacheType jit_cache; + std::shared_ptr<Dynarmic::A64::Jit> jit; ARM_Unicorn inner_unicorn; std::size_t core_index; @@ -67,7 +77,7 @@ private: class DynarmicExclusiveMonitor final : public ExclusiveMonitor { public: - explicit DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count); + explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count); ~DynarmicExclusiveMonitor() override; void SetExclusive(std::size_t core_index, VAddr addr) override; @@ -80,7 +90,7 @@ public: bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; private: - friend class ARM_Dynarmic; + friend class ARM_Dynarmic_64; Dynarmic::A64::ExclusiveMonitor monitor; Memory::Memory& memory; }; diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp new file mode 100644 index 000000000..3fdcdebde --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp @@ -0,0 +1,80 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/arm/dynarmic/arm_dynarmic_cp15.h" + +using Callback = Dynarmic::A32::Coprocessor::Callback; +using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; +using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; + +std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, + CoprocReg CRd, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) { + return {}; +} + +CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) { + // TODO(merry): Privileged CP15 registers + + if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { + // This is a dummy write, we ignore the value written here. + return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)]; + } + + if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { + switch (opc2) { + case 4: + // This is a dummy write, we ignore the value written here. + return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)]; + case 5: + // This is a dummy write, we ignore the value written here. + return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)]; + default: + return {}; + } + } + + if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { + return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; + } + + return {}; +} + +CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { + return {}; +} + +CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) { + // TODO(merry): Privileged CP15 registers + + if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { + switch (opc2) { + case 2: + return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; + case 3: + return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; + default: + return {}; + } + } + + return {}; +} + +CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { + return {}; +} + +std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, + std::optional<u8> option) { + return {}; +} + +std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, + std::optional<u8> option) { + return {}; +} diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h new file mode 100644 index 000000000..07bcde5f9 --- /dev/null +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h @@ -0,0 +1,152 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> + +#include <dynarmic/A32/coprocessor.h> +#include "common/common_types.h" + +enum class CP15Register { + // c0 - Information registers + CP15_MAIN_ID, + CP15_CACHE_TYPE, + CP15_TCM_STATUS, + CP15_TLB_TYPE, + CP15_CPU_ID, + CP15_PROCESSOR_FEATURE_0, + CP15_PROCESSOR_FEATURE_1, + CP15_DEBUG_FEATURE_0, + CP15_AUXILIARY_FEATURE_0, + CP15_MEMORY_MODEL_FEATURE_0, + CP15_MEMORY_MODEL_FEATURE_1, + CP15_MEMORY_MODEL_FEATURE_2, + CP15_MEMORY_MODEL_FEATURE_3, + CP15_ISA_FEATURE_0, + CP15_ISA_FEATURE_1, + CP15_ISA_FEATURE_2, + CP15_ISA_FEATURE_3, + CP15_ISA_FEATURE_4, + + // c1 - Control registers + CP15_CONTROL, + CP15_AUXILIARY_CONTROL, + CP15_COPROCESSOR_ACCESS_CONTROL, + + // c2 - Translation table registers + CP15_TRANSLATION_BASE_TABLE_0, + CP15_TRANSLATION_BASE_TABLE_1, + CP15_TRANSLATION_BASE_CONTROL, + CP15_DOMAIN_ACCESS_CONTROL, + CP15_RESERVED, + + // c5 - Fault status registers + CP15_FAULT_STATUS, + CP15_INSTR_FAULT_STATUS, + CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS, + CP15_INST_FSR, + + // c6 - Fault Address registers + CP15_FAULT_ADDRESS, + CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS, + CP15_WFAR, + CP15_IFAR, + + // c7 - Cache operation registers + CP15_WAIT_FOR_INTERRUPT, + CP15_PHYS_ADDRESS, + CP15_INVALIDATE_INSTR_CACHE, + CP15_INVALIDATE_INSTR_CACHE_USING_MVA, + CP15_INVALIDATE_INSTR_CACHE_USING_INDEX, + CP15_FLUSH_PREFETCH_BUFFER, + CP15_FLUSH_BRANCH_TARGET_CACHE, + CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY, + CP15_INVALIDATE_DATA_CACHE, + CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA, + CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, + CP15_INVALIDATE_DATA_AND_INSTR_CACHE, + CP15_CLEAN_DATA_CACHE, + CP15_CLEAN_DATA_CACHE_LINE_USING_MVA, + CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX, + CP15_DATA_SYNC_BARRIER, + CP15_DATA_MEMORY_BARRIER, + CP15_CLEAN_AND_INVALIDATE_DATA_CACHE, + CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA, + CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, + + // c8 - TLB operations + CP15_INVALIDATE_ITLB, + CP15_INVALIDATE_ITLB_SINGLE_ENTRY, + CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH, + CP15_INVALIDATE_ITLB_ENTRY_ON_MVA, + CP15_INVALIDATE_DTLB, + CP15_INVALIDATE_DTLB_SINGLE_ENTRY, + CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH, + CP15_INVALIDATE_DTLB_ENTRY_ON_MVA, + CP15_INVALIDATE_UTLB, + CP15_INVALIDATE_UTLB_SINGLE_ENTRY, + CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH, + CP15_INVALIDATE_UTLB_ENTRY_ON_MVA, + + // c9 - Data cache lockdown register + CP15_DATA_CACHE_LOCKDOWN, + + // c10 - TLB/Memory map registers + CP15_TLB_LOCKDOWN, + CP15_PRIMARY_REGION_REMAP, + CP15_NORMAL_REGION_REMAP, + + // c13 - Thread related registers + CP15_PID, + CP15_CONTEXT_ID, + CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write + CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W) + CP15_THREAD_PRW, // Thread ID register - Privileged R/W only. + + // c15 - Performance and TLB lockdown registers + CP15_PERFORMANCE_MONITOR_CONTROL, + CP15_CYCLE_COUNTER, + CP15_COUNT_0, + CP15_COUNT_1, + CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY, + CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY, + CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS, + CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS, + CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE, + CP15_TLB_DEBUG_CONTROL, + + // Skyeye defined + CP15_TLB_FAULT_ADDR, + CP15_TLB_FAULT_STATUS, + + // Not an actual register. + // All registers should be defined above this. + CP15_REGISTER_COUNT, +}; + +class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { +public: + using CoprocReg = Dynarmic::A32::CoprocReg; + + explicit DynarmicCP15(u32* cp15) : CP15(cp15){}; + + std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, + CoprocReg CRn, CoprocReg CRm, + unsigned opc2) override; + CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, + CoprocReg CRm, unsigned opc2) override; + CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override; + CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, + unsigned opc2) override; + CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override; + std::optional<Callback> CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, + std::optional<u8> option) override; + std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, + std::optional<u8> option) override; + +private: + u32* CP15{}; +}; diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp index 94570e520..b32401e0b 100644 --- a/src/core/arm/exclusive_monitor.cpp +++ b/src/core/arm/exclusive_monitor.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" #endif #include "core/arm/exclusive_monitor.h" #include "core/memory.h" diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index f99ad5802..8a9800a96 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -53,7 +53,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si void* user_data) { auto* const system = static_cast<System*>(user_data); - ARM_Interface::ThreadContext ctx{}; + ARM_Interface::ThreadContext64 ctx{}; system->CurrentArmInterface().SaveContext(ctx); ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, ctx.pc, ctx.cpu_registers[30]); @@ -179,7 +179,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { } Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread(); - SaveContext(thread->GetContext()); + SaveContext(thread->GetContext64()); if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) { last_bkpt_hit = false; GDBStub::Break(); @@ -188,7 +188,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { } } -void ARM_Unicorn::SaveContext(ThreadContext& ctx) { +void ARM_Unicorn::SaveContext(ThreadContext64& ctx) { int uregs[32]; void* tregs[32]; @@ -215,7 +215,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) { CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32)); } -void ARM_Unicorn::LoadContext(const ThreadContext& ctx) { +void ARM_Unicorn::LoadContext(const ThreadContext64& ctx) { int uregs[32]; void* tregs[32]; diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 3c5b155f9..f30d13cb6 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -30,8 +30,6 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void SaveContext(ThreadContext& ctx) override; - void LoadContext(const ThreadContext& ctx) override; void PrepareReschedule() override; void ClearExclusiveState() override; void ExecuteInstructions(std::size_t num_instructions); @@ -41,6 +39,11 @@ public: void PageTableChanged(Common::PageTable&, std::size_t) override {} void RecordBreak(GDBStub::BreakpointAddress bkpt); + void SaveContext(ThreadContext32& ctx) override {} + void SaveContext(ThreadContext64& ctx) override; + void LoadContext(const ThreadContext32& ctx) override {} + void LoadContext(const ThreadContext64& ctx) override; + private: static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data); diff --git a/src/core/core.cpp b/src/core/core.cpp index 86e314c94..a82faf127 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -24,6 +24,7 @@ #include "core/file_sys/sdmc_factory.h" #include "core/file_sys/vfs_concat.h" #include "core/file_sys/vfs_real.h" +#include "core/frontend/scope_acquire_context.h" #include "core/gdbstub/gdbstub.h" #include "core/hardware_interrupt_manager.h" #include "core/hle/kernel/client_port.h" @@ -184,6 +185,8 @@ struct System::Impl { ResultStatus Load(System& system, Frontend::EmuWindow& emu_window, const std::string& filepath) { + Core::Frontend::ScopeAcquireContext acquire_context{emu_window}; + app_loader = Loader::GetLoader(GetGameFileFromPath(virtual_filesystem, filepath)); if (!app_loader) { LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath); diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp index 8eacf92dd..b6b797c80 100644 --- a/src/core/core_manager.cpp +++ b/src/core/core_manager.cpp @@ -6,9 +6,6 @@ #include <mutex> #include "common/logging/log.h" -#ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic.h" -#endif #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 3376eedc5..5eb87fb63 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -26,9 +26,6 @@ public: /// Releases (dunno if this is the "right" word) the context from the caller thread virtual void DoneCurrent() = 0; - - /// Swap buffers to display the next frame - virtual void SwapBuffers() = 0; }; /** diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index 1d39c1faf..e9d0a40d3 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -29,6 +29,7 @@ enum class AspectRatio { struct FramebufferLayout { u32 width{ScreenUndocked::Width}; u32 height{ScreenUndocked::Height}; + bool is_srgb{}; Common::Rectangle<u32> screen; diff --git a/src/core/frontend/scope_acquire_context.cpp b/src/core/frontend/scope_acquire_context.cpp new file mode 100644 index 000000000..878c3157c --- /dev/null +++ b/src/core/frontend/scope_acquire_context.cpp @@ -0,0 +1,18 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/frontend/emu_window.h" +#include "core/frontend/scope_acquire_context.h" + +namespace Core::Frontend { + +ScopeAcquireContext::ScopeAcquireContext(Core::Frontend::GraphicsContext& context) + : context{context} { + context.MakeCurrent(); +} +ScopeAcquireContext::~ScopeAcquireContext() { + context.DoneCurrent(); +} + +} // namespace Core::Frontend diff --git a/src/core/frontend/scope_acquire_window_context.h b/src/core/frontend/scope_acquire_context.h index 2d9f6e825..7a65c0623 100644 --- a/src/core/frontend/scope_acquire_window_context.h +++ b/src/core/frontend/scope_acquire_context.h @@ -8,16 +8,16 @@ namespace Core::Frontend { -class EmuWindow; +class GraphicsContext; /// Helper class to acquire/release window context within a given scope -class ScopeAcquireWindowContext : NonCopyable { +class ScopeAcquireContext : NonCopyable { public: - explicit ScopeAcquireWindowContext(Core::Frontend::EmuWindow& window); - ~ScopeAcquireWindowContext(); + explicit ScopeAcquireContext(Core::Frontend::GraphicsContext& context); + ~ScopeAcquireContext(); private: - Core::Frontend::EmuWindow& emu_window; + Core::Frontend::GraphicsContext& context; }; } // namespace Core::Frontend diff --git a/src/core/frontend/scope_acquire_window_context.cpp b/src/core/frontend/scope_acquire_window_context.cpp deleted file mode 100644 index 3663dad17..000000000 --- a/src/core/frontend/scope_acquire_window_context.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "core/frontend/emu_window.h" -#include "core/frontend/scope_acquire_window_context.h" - -namespace Core::Frontend { - -ScopeAcquireWindowContext::ScopeAcquireWindowContext(Core::Frontend::EmuWindow& emu_window_) - : emu_window{emu_window_} { - emu_window.MakeCurrent(); -} -ScopeAcquireWindowContext::~ScopeAcquireWindowContext() { - emu_window.DoneCurrent(); -} - -} // namespace Core::Frontend diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 67e95999d..e8d8871a7 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -217,7 +217,7 @@ static u64 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) { return 0; } - const auto& thread_context = thread->GetContext(); + const auto& thread_context = thread->GetContext64(); if (id < SP_REGISTER) { return thread_context.cpu_registers[id]; @@ -239,7 +239,7 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr) return; } - auto& thread_context = thread->GetContext(); + auto& thread_context = thread->GetContext64(); if (id < SP_REGISTER) { thread_context.cpu_registers[id] = val; @@ -259,7 +259,7 @@ static u128 FpuRead(std::size_t id, Kernel::Thread* thread = nullptr) { return u128{0}; } - auto& thread_context = thread->GetContext(); + auto& thread_context = thread->GetContext64(); if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { return thread_context.vector_registers[id - UC_ARM64_REG_Q0]; @@ -275,7 +275,7 @@ static void FpuWrite(std::size_t id, u128 val, Kernel::Thread* thread = nullptr) return; } - auto& thread_context = thread->GetContext(); + auto& thread_context = thread->GetContext64(); if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val; @@ -916,7 +916,7 @@ static void WriteRegister() { // Update ARM context, skipping scheduler - no running threads at this point Core::System::GetInstance() .ArmInterface(current_core) - .LoadContext(current_thread->GetContext()); + .LoadContext(current_thread->GetContext64()); SendReply("OK"); } @@ -947,7 +947,7 @@ static void WriteRegisters() { // Update ARM context, skipping scheduler - no running threads at this point Core::System::GetInstance() .ArmInterface(current_core) - .LoadContext(current_thread->GetContext()); + .LoadContext(current_thread->GetContext64()); SendReply("OK"); } @@ -1019,7 +1019,7 @@ static void Step() { // Update ARM context, skipping scheduler - no running threads at this point Core::System::GetInstance() .ArmInterface(current_core) - .LoadContext(current_thread->GetContext()); + .LoadContext(current_thread->GetContext64()); } step_loop = true; halt_loop = true; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 9232f4d7e..e47f1deed 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -186,6 +186,10 @@ struct KernelCore::Impl { return; } + for (auto& core : cores) { + core.SetIs64Bit(process->Is64BitProcess()); + } + system.Memory().SetCurrentPageTable(*process); } diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index 9303dd273..aa2787467 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -5,7 +5,8 @@ #include "common/logging/log.h" #include "core/arm/arm_interface.h" #ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic.h" +#include "core/arm/dynarmic/arm_dynarmic_32.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" #endif #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" @@ -20,13 +21,17 @@ PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor) : core_index{id} { #ifdef ARCHITECTURE_x86_64 - arm_interface = std::make_unique<Core::ARM_Dynarmic>(system, exclusive_monitor, core_index); + arm_interface_32 = + std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index); + arm_interface_64 = + std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index); + #else arm_interface = std::make_shared<Core::ARM_Unicorn>(system); LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); #endif - scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); + scheduler = std::make_unique<Kernel::Scheduler>(system, core_index); } PhysicalCore::~PhysicalCore() = default; @@ -48,4 +53,12 @@ void PhysicalCore::Shutdown() { scheduler->Shutdown(); } +void PhysicalCore::SetIs64Bit(bool is_64_bit) { + if (is_64_bit) { + arm_interface = arm_interface_64.get(); + } else { + arm_interface = arm_interface_32.get(); + } +} + } // namespace Kernel diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 4c32c0f1b..3269166be 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -68,10 +68,14 @@ public: return *scheduler; } + void SetIs64Bit(bool is_64_bit); + private: std::size_t core_index; - std::unique_ptr<Core::ARM_Interface> arm_interface; + std::unique_ptr<Core::ARM_Interface> arm_interface_32; + std::unique_ptr<Core::ARM_Interface> arm_interface_64; std::unique_ptr<Kernel::Scheduler> scheduler; + Core::ARM_Interface* arm_interface{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 2fcb7326c..edc414d69 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -42,7 +42,8 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { // Register 1 must be a handle to the main thread const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); - thread->GetContext().cpu_registers[1] = thread_handle; + thread->GetContext32().cpu_registers[1] = thread_handle; + thread->GetContext64().cpu_registers[1] = thread_handle; // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires thread->ResumeFromWait(); diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index c65f82fb7..1140c72a3 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -383,8 +383,8 @@ void GlobalScheduler::Unlock() { // TODO(Blinkhawk): Setup the interrupts and change context on current core. } -Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id) - : system(system), cpu_core(cpu_core), core_id(core_id) {} +Scheduler::Scheduler(Core::System& system, std::size_t core_id) + : system{system}, core_id{core_id} {} Scheduler::~Scheduler() = default; @@ -422,9 +422,10 @@ void Scheduler::UnloadThread() { // Save context for previous thread if (previous_thread) { - cpu_core.SaveContext(previous_thread->GetContext()); + system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); + system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); // Save the TPIDR_EL0 system register in case it was modified. - previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); if (previous_thread->GetStatus() == ThreadStatus::Running) { // This is only the case when a reschedule is triggered without the current thread @@ -451,9 +452,10 @@ void Scheduler::SwitchContext() { // Save context for previous thread if (previous_thread) { - cpu_core.SaveContext(previous_thread->GetContext()); + system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); + system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); // Save the TPIDR_EL0 system register in case it was modified. - previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); if (previous_thread->GetStatus() == ThreadStatus::Running) { // This is only the case when a reschedule is triggered without the current thread @@ -481,9 +483,10 @@ void Scheduler::SwitchContext() { system.Kernel().MakeCurrentProcess(thread_owner_process); } - cpu_core.LoadContext(new_thread->GetContext()); - cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); - cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); + system.ArmInterface(core_id).LoadContext(new_thread->GetContext32()); + system.ArmInterface(core_id).LoadContext(new_thread->GetContext64()); + system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress()); + system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); } else { current_thread = nullptr; // Note: We do not reset the current process and current page table when idling because diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 1c93a838c..07df33f9c 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -181,7 +181,7 @@ private: class Scheduler final { public: - explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id); + explicit Scheduler(Core::System& system, std::size_t core_id); ~Scheduler(); /// Returns whether there are any threads that are ready to run. @@ -235,7 +235,6 @@ private: std::shared_ptr<Thread> selected_thread = nullptr; Core::System& system; - Core::ARM_Interface& cpu_core; u64 last_context_switch_time = 0; u64 idle_selection_count = 0; const std::size_t core_id; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index fd91779a3..4ffc113c2 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -187,6 +187,13 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s return RESULT_SUCCESS; } +static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_size) { + VAddr temp_heap_addr{}; + const ResultCode result{SetHeapSize(system, &temp_heap_addr, heap_size)}; + *heap_addr = static_cast<u32>(temp_heap_addr); + return result; +} + static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) { LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot); @@ -371,6 +378,12 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, return RESULT_SUCCESS; } +static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle, + u32 port_name_address) { + + return ConnectToNamedPort(system, out_handle, port_name_address); +} + /// Makes a blocking IPC call to an OS service. static ResultCode SendSyncRequest(Core::System& system, Handle handle) { const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); @@ -390,6 +403,10 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { return session->SendSyncRequest(SharedFrom(thread), system.Memory()); } +static ResultCode SendSyncRequest32(Core::System& system, Handle handle) { + return SendSyncRequest(system, handle); +} + /// Get the ID for the specified thread. static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) { LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); @@ -405,6 +422,17 @@ static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle threa return RESULT_SUCCESS; } +static ResultCode GetThreadId32(Core::System& system, u32* thread_id_low, u32* thread_id_high, + Handle thread_handle) { + u64 thread_id{}; + const ResultCode result{GetThreadId(system, &thread_id, thread_handle)}; + + *thread_id_low = static_cast<u32>(thread_id >> 32); + *thread_id_high = static_cast<u32>(thread_id & std::numeric_limits<u32>::max()); + + return result; +} + /// Gets the ID of the specified process or a specified thread's owning process. static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) { LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle); @@ -479,6 +507,12 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr return result; } +static ResultCode WaitSynchronization32(Core::System& system, u32 timeout_low, u32 handles_address, + s32 handle_count, u32 timeout_high, Handle* index) { + const s64 nano_seconds{(static_cast<s64>(timeout_high) << 32) | static_cast<s64>(timeout_low)}; + return WaitSynchronization(system, index, handles_address, handle_count, nano_seconds); +} + /// Resumes a thread waiting on WaitSynchronization static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) { LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle); @@ -917,6 +951,18 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha } } +static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_high, u32 sub_id_low, + u32 info_id, u32 handle, u32 sub_id_high) { + const u64 sub_id{static_cast<u64>(sub_id_low | (static_cast<u64>(sub_id_high) << 32))}; + u64 res_value{}; + + const ResultCode result{GetInfo(system, &res_value, info_id, handle, sub_id)}; + *result_high = static_cast<u32>(res_value >> 32); + *result_low = static_cast<u32>(res_value & std::numeric_limits<u32>::max()); + + return result; +} + /// Maps memory at a desired address static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); @@ -1058,7 +1104,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H return ERR_BUSY; } - Core::ARM_Interface::ThreadContext ctx = thread->GetContext(); + Core::ARM_Interface::ThreadContext64 ctx = thread->GetContext64(); // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. ctx.pstate &= 0xFF0FFE20; @@ -1088,6 +1134,10 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle return RESULT_SUCCESS; } +static ResultCode GetThreadPriority32(Core::System& system, u32* priority, Handle handle) { + return GetThreadPriority(system, priority, handle); +} + /// Sets the priority for the specified thread static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) { LOG_TRACE(Kernel_SVC, "called"); @@ -1259,6 +1309,11 @@ static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address, query_address); } +static ResultCode QueryMemory32(Core::System& system, u32 memory_info_address, + u32 page_info_address, u32 query_address) { + return QueryMemory(system, memory_info_address, page_info_address, query_address); +} + static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, u64 src_address, u64 size) { LOG_DEBUG(Kernel_SVC, @@ -1675,6 +1730,10 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ } } +static void SignalProcessWideKey32(Core::System& system, u32 condition_variable_addr, s32 target) { + SignalProcessWideKey(system, condition_variable_addr, target); +} + // Wait for an address (via Address Arbiter) static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, s64 timeout) { @@ -1760,6 +1819,10 @@ static ResultCode CloseHandle(Core::System& system, Handle handle) { return handle_table.Close(handle); } +static ResultCode CloseHandle32(Core::System& system, Handle handle) { + return CloseHandle(system, handle); +} + /// Clears the signaled state of an event or process. static ResultCode ResetSignal(Core::System& system, Handle handle) { LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle); @@ -2317,69 +2380,196 @@ struct FunctionDef { }; } // namespace -static const FunctionDef SVC_Table[] = { +static const FunctionDef SVC_Table_32[] = { {0x00, nullptr, "Unknown"}, - {0x01, SvcWrap<SetHeapSize>, "SetHeapSize"}, - {0x02, SvcWrap<SetMemoryPermission>, "SetMemoryPermission"}, - {0x03, SvcWrap<SetMemoryAttribute>, "SetMemoryAttribute"}, - {0x04, SvcWrap<MapMemory>, "MapMemory"}, - {0x05, SvcWrap<UnmapMemory>, "UnmapMemory"}, - {0x06, SvcWrap<QueryMemory>, "QueryMemory"}, - {0x07, SvcWrap<ExitProcess>, "ExitProcess"}, - {0x08, SvcWrap<CreateThread>, "CreateThread"}, - {0x09, SvcWrap<StartThread>, "StartThread"}, - {0x0A, SvcWrap<ExitThread>, "ExitThread"}, - {0x0B, SvcWrap<SleepThread>, "SleepThread"}, - {0x0C, SvcWrap<GetThreadPriority>, "GetThreadPriority"}, - {0x0D, SvcWrap<SetThreadPriority>, "SetThreadPriority"}, - {0x0E, SvcWrap<GetThreadCoreMask>, "GetThreadCoreMask"}, - {0x0F, SvcWrap<SetThreadCoreMask>, "SetThreadCoreMask"}, - {0x10, SvcWrap<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"}, - {0x11, SvcWrap<SignalEvent>, "SignalEvent"}, - {0x12, SvcWrap<ClearEvent>, "ClearEvent"}, - {0x13, SvcWrap<MapSharedMemory>, "MapSharedMemory"}, - {0x14, SvcWrap<UnmapSharedMemory>, "UnmapSharedMemory"}, - {0x15, SvcWrap<CreateTransferMemory>, "CreateTransferMemory"}, - {0x16, SvcWrap<CloseHandle>, "CloseHandle"}, - {0x17, SvcWrap<ResetSignal>, "ResetSignal"}, - {0x18, SvcWrap<WaitSynchronization>, "WaitSynchronization"}, - {0x19, SvcWrap<CancelSynchronization>, "CancelSynchronization"}, - {0x1A, SvcWrap<ArbitrateLock>, "ArbitrateLock"}, - {0x1B, SvcWrap<ArbitrateUnlock>, "ArbitrateUnlock"}, - {0x1C, SvcWrap<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"}, - {0x1D, SvcWrap<SignalProcessWideKey>, "SignalProcessWideKey"}, - {0x1E, SvcWrap<GetSystemTick>, "GetSystemTick"}, - {0x1F, SvcWrap<ConnectToNamedPort>, "ConnectToNamedPort"}, + {0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"}, + {0x02, nullptr, "Unknown"}, + {0x03, nullptr, "SetMemoryAttribute32"}, + {0x04, nullptr, "MapMemory32"}, + {0x05, nullptr, "UnmapMemory32"}, + {0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"}, + {0x07, nullptr, "ExitProcess32"}, + {0x08, nullptr, "CreateThread32"}, + {0x09, nullptr, "StartThread32"}, + {0x0a, nullptr, "ExitThread32"}, + {0x0b, nullptr, "SleepThread32"}, + {0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"}, + {0x0d, nullptr, "SetThreadPriority32"}, + {0x0e, nullptr, "GetThreadCoreMask32"}, + {0x0f, nullptr, "SetThreadCoreMask32"}, + {0x10, nullptr, "GetCurrentProcessorNumber32"}, + {0x11, nullptr, "SignalEvent32"}, + {0x12, nullptr, "ClearEvent32"}, + {0x13, nullptr, "MapSharedMemory32"}, + {0x14, nullptr, "UnmapSharedMemory32"}, + {0x15, nullptr, "CreateTransferMemory32"}, + {0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"}, + {0x17, nullptr, "ResetSignal32"}, + {0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"}, + {0x19, nullptr, "CancelSynchronization32"}, + {0x1a, nullptr, "ArbitrateLock32"}, + {0x1b, nullptr, "ArbitrateUnlock32"}, + {0x1c, nullptr, "WaitProcessWideKeyAtomic32"}, + {0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"}, + {0x1e, nullptr, "GetSystemTick32"}, + {0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"}, + {0x20, nullptr, "Unknown"}, + {0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"}, + {0x22, nullptr, "SendSyncRequestWithUserBuffer32"}, + {0x23, nullptr, "Unknown"}, + {0x24, nullptr, "GetProcessId32"}, + {0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"}, + {0x26, nullptr, "Break32"}, + {0x27, nullptr, "OutputDebugString32"}, + {0x28, nullptr, "Unknown"}, + {0x29, SvcWrap32<GetInfo32>, "GetInfo32"}, + {0x2a, nullptr, "Unknown"}, + {0x2b, nullptr, "Unknown"}, + {0x2c, nullptr, "MapPhysicalMemory32"}, + {0x2d, nullptr, "UnmapPhysicalMemory32"}, + {0x2e, nullptr, "Unknown"}, + {0x2f, nullptr, "Unknown"}, + {0x30, nullptr, "Unknown"}, + {0x31, nullptr, "Unknown"}, + {0x32, nullptr, "SetThreadActivity32"}, + {0x33, nullptr, "GetThreadContext32"}, + {0x34, nullptr, "WaitForAddress32"}, + {0x35, nullptr, "SignalToAddress32"}, + {0x36, nullptr, "Unknown"}, + {0x37, nullptr, "Unknown"}, + {0x38, nullptr, "Unknown"}, + {0x39, nullptr, "Unknown"}, + {0x3a, nullptr, "Unknown"}, + {0x3b, nullptr, "Unknown"}, + {0x3c, nullptr, "Unknown"}, + {0x3d, nullptr, "Unknown"}, + {0x3e, nullptr, "Unknown"}, + {0x3f, nullptr, "Unknown"}, + {0x40, nullptr, "CreateSession32"}, + {0x41, nullptr, "AcceptSession32"}, + {0x42, nullptr, "Unknown"}, + {0x43, nullptr, "ReplyAndReceive32"}, + {0x44, nullptr, "Unknown"}, + {0x45, nullptr, "CreateEvent32"}, + {0x46, nullptr, "Unknown"}, + {0x47, nullptr, "Unknown"}, + {0x48, nullptr, "Unknown"}, + {0x49, nullptr, "Unknown"}, + {0x4a, nullptr, "Unknown"}, + {0x4b, nullptr, "Unknown"}, + {0x4c, nullptr, "Unknown"}, + {0x4d, nullptr, "Unknown"}, + {0x4e, nullptr, "Unknown"}, + {0x4f, nullptr, "Unknown"}, + {0x50, nullptr, "Unknown"}, + {0x51, nullptr, "Unknown"}, + {0x52, nullptr, "Unknown"}, + {0x53, nullptr, "Unknown"}, + {0x54, nullptr, "Unknown"}, + {0x55, nullptr, "Unknown"}, + {0x56, nullptr, "Unknown"}, + {0x57, nullptr, "Unknown"}, + {0x58, nullptr, "Unknown"}, + {0x59, nullptr, "Unknown"}, + {0x5a, nullptr, "Unknown"}, + {0x5b, nullptr, "Unknown"}, + {0x5c, nullptr, "Unknown"}, + {0x5d, nullptr, "Unknown"}, + {0x5e, nullptr, "Unknown"}, + {0x5F, nullptr, "FlushProcessDataCache32"}, + {0x60, nullptr, "Unknown"}, + {0x61, nullptr, "Unknown"}, + {0x62, nullptr, "Unknown"}, + {0x63, nullptr, "Unknown"}, + {0x64, nullptr, "Unknown"}, + {0x65, nullptr, "GetProcessList32"}, + {0x66, nullptr, "Unknown"}, + {0x67, nullptr, "Unknown"}, + {0x68, nullptr, "Unknown"}, + {0x69, nullptr, "Unknown"}, + {0x6A, nullptr, "Unknown"}, + {0x6B, nullptr, "Unknown"}, + {0x6C, nullptr, "Unknown"}, + {0x6D, nullptr, "Unknown"}, + {0x6E, nullptr, "Unknown"}, + {0x6f, nullptr, "GetSystemInfo32"}, + {0x70, nullptr, "CreatePort32"}, + {0x71, nullptr, "ManageNamedPort32"}, + {0x72, nullptr, "ConnectToPort32"}, + {0x73, nullptr, "SetProcessMemoryPermission32"}, + {0x74, nullptr, "Unknown"}, + {0x75, nullptr, "Unknown"}, + {0x76, nullptr, "Unknown"}, + {0x77, nullptr, "MapProcessCodeMemory32"}, + {0x78, nullptr, "UnmapProcessCodeMemory32"}, + {0x79, nullptr, "Unknown"}, + {0x7A, nullptr, "Unknown"}, + {0x7B, nullptr, "TerminateProcess32"}, +}; + +static const FunctionDef SVC_Table_64[] = { + {0x00, nullptr, "Unknown"}, + {0x01, SvcWrap64<SetHeapSize>, "SetHeapSize"}, + {0x02, SvcWrap64<SetMemoryPermission>, "SetMemoryPermission"}, + {0x03, SvcWrap64<SetMemoryAttribute>, "SetMemoryAttribute"}, + {0x04, SvcWrap64<MapMemory>, "MapMemory"}, + {0x05, SvcWrap64<UnmapMemory>, "UnmapMemory"}, + {0x06, SvcWrap64<QueryMemory>, "QueryMemory"}, + {0x07, SvcWrap64<ExitProcess>, "ExitProcess"}, + {0x08, SvcWrap64<CreateThread>, "CreateThread"}, + {0x09, SvcWrap64<StartThread>, "StartThread"}, + {0x0A, SvcWrap64<ExitThread>, "ExitThread"}, + {0x0B, SvcWrap64<SleepThread>, "SleepThread"}, + {0x0C, SvcWrap64<GetThreadPriority>, "GetThreadPriority"}, + {0x0D, SvcWrap64<SetThreadPriority>, "SetThreadPriority"}, + {0x0E, SvcWrap64<GetThreadCoreMask>, "GetThreadCoreMask"}, + {0x0F, SvcWrap64<SetThreadCoreMask>, "SetThreadCoreMask"}, + {0x10, SvcWrap64<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"}, + {0x11, SvcWrap64<SignalEvent>, "SignalEvent"}, + {0x12, SvcWrap64<ClearEvent>, "ClearEvent"}, + {0x13, SvcWrap64<MapSharedMemory>, "MapSharedMemory"}, + {0x14, SvcWrap64<UnmapSharedMemory>, "UnmapSharedMemory"}, + {0x15, SvcWrap64<CreateTransferMemory>, "CreateTransferMemory"}, + {0x16, SvcWrap64<CloseHandle>, "CloseHandle"}, + {0x17, SvcWrap64<ResetSignal>, "ResetSignal"}, + {0x18, SvcWrap64<WaitSynchronization>, "WaitSynchronization"}, + {0x19, SvcWrap64<CancelSynchronization>, "CancelSynchronization"}, + {0x1A, SvcWrap64<ArbitrateLock>, "ArbitrateLock"}, + {0x1B, SvcWrap64<ArbitrateUnlock>, "ArbitrateUnlock"}, + {0x1C, SvcWrap64<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"}, + {0x1D, SvcWrap64<SignalProcessWideKey>, "SignalProcessWideKey"}, + {0x1E, SvcWrap64<GetSystemTick>, "GetSystemTick"}, + {0x1F, SvcWrap64<ConnectToNamedPort>, "ConnectToNamedPort"}, {0x20, nullptr, "SendSyncRequestLight"}, - {0x21, SvcWrap<SendSyncRequest>, "SendSyncRequest"}, + {0x21, SvcWrap64<SendSyncRequest>, "SendSyncRequest"}, {0x22, nullptr, "SendSyncRequestWithUserBuffer"}, {0x23, nullptr, "SendAsyncRequestWithUserBuffer"}, - {0x24, SvcWrap<GetProcessId>, "GetProcessId"}, - {0x25, SvcWrap<GetThreadId>, "GetThreadId"}, - {0x26, SvcWrap<Break>, "Break"}, - {0x27, SvcWrap<OutputDebugString>, "OutputDebugString"}, + {0x24, SvcWrap64<GetProcessId>, "GetProcessId"}, + {0x25, SvcWrap64<GetThreadId>, "GetThreadId"}, + {0x26, SvcWrap64<Break>, "Break"}, + {0x27, SvcWrap64<OutputDebugString>, "OutputDebugString"}, {0x28, nullptr, "ReturnFromException"}, - {0x29, SvcWrap<GetInfo>, "GetInfo"}, + {0x29, SvcWrap64<GetInfo>, "GetInfo"}, {0x2A, nullptr, "FlushEntireDataCache"}, {0x2B, nullptr, "FlushDataCache"}, - {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, - {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, + {0x2C, SvcWrap64<MapPhysicalMemory>, "MapPhysicalMemory"}, + {0x2D, SvcWrap64<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, {0x2E, nullptr, "GetFutureThreadInfo"}, {0x2F, nullptr, "GetLastThreadInfo"}, - {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, - {0x31, SvcWrap<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, - {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, - {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, - {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, - {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"}, + {0x30, SvcWrap64<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, + {0x31, SvcWrap64<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, + {0x32, SvcWrap64<SetThreadActivity>, "SetThreadActivity"}, + {0x33, SvcWrap64<GetThreadContext>, "GetThreadContext"}, + {0x34, SvcWrap64<WaitForAddress>, "WaitForAddress"}, + {0x35, SvcWrap64<SignalToAddress>, "SignalToAddress"}, {0x36, nullptr, "SynchronizePreemptionState"}, {0x37, nullptr, "Unknown"}, {0x38, nullptr, "Unknown"}, {0x39, nullptr, "Unknown"}, {0x3A, nullptr, "Unknown"}, {0x3B, nullptr, "Unknown"}, - {0x3C, SvcWrap<KernelDebug>, "KernelDebug"}, - {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"}, + {0x3C, SvcWrap64<KernelDebug>, "KernelDebug"}, + {0x3D, SvcWrap64<ChangeKernelTraceState>, "ChangeKernelTraceState"}, {0x3E, nullptr, "Unknown"}, {0x3F, nullptr, "Unknown"}, {0x40, nullptr, "CreateSession"}, @@ -2387,7 +2577,7 @@ static const FunctionDef SVC_Table[] = { {0x42, nullptr, "ReplyAndReceiveLight"}, {0x43, nullptr, "ReplyAndReceive"}, {0x44, nullptr, "ReplyAndReceiveWithUserBuffer"}, - {0x45, SvcWrap<CreateEvent>, "CreateEvent"}, + {0x45, SvcWrap64<CreateEvent>, "CreateEvent"}, {0x46, nullptr, "Unknown"}, {0x47, nullptr, "Unknown"}, {0x48, nullptr, "MapPhysicalMemoryUnsafe"}, @@ -2398,9 +2588,9 @@ static const FunctionDef SVC_Table[] = { {0x4D, nullptr, "SleepSystem"}, {0x4E, nullptr, "ReadWriteRegister"}, {0x4F, nullptr, "SetProcessActivity"}, - {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"}, - {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"}, - {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"}, + {0x50, SvcWrap64<CreateSharedMemory>, "CreateSharedMemory"}, + {0x51, SvcWrap64<MapTransferMemory>, "MapTransferMemory"}, + {0x52, SvcWrap64<UnmapTransferMemory>, "UnmapTransferMemory"}, {0x53, nullptr, "CreateInterruptEvent"}, {0x54, nullptr, "QueryPhysicalAddress"}, {0x55, nullptr, "QueryIoMapping"}, @@ -2419,8 +2609,8 @@ static const FunctionDef SVC_Table[] = { {0x62, nullptr, "TerminateDebugProcess"}, {0x63, nullptr, "GetDebugEvent"}, {0x64, nullptr, "ContinueDebugEvent"}, - {0x65, SvcWrap<GetProcessList>, "GetProcessList"}, - {0x66, SvcWrap<GetThreadList>, "GetThreadList"}, + {0x65, SvcWrap64<GetProcessList>, "GetProcessList"}, + {0x66, SvcWrap64<GetThreadList>, "GetThreadList"}, {0x67, nullptr, "GetDebugThreadContext"}, {0x68, nullptr, "SetDebugThreadContext"}, {0x69, nullptr, "QueryDebugProcessMemory"}, @@ -2436,24 +2626,32 @@ static const FunctionDef SVC_Table[] = { {0x73, nullptr, "SetProcessMemoryPermission"}, {0x74, nullptr, "MapProcessMemory"}, {0x75, nullptr, "UnmapProcessMemory"}, - {0x76, SvcWrap<QueryProcessMemory>, "QueryProcessMemory"}, - {0x77, SvcWrap<MapProcessCodeMemory>, "MapProcessCodeMemory"}, - {0x78, SvcWrap<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, + {0x76, SvcWrap64<QueryProcessMemory>, "QueryProcessMemory"}, + {0x77, SvcWrap64<MapProcessCodeMemory>, "MapProcessCodeMemory"}, + {0x78, SvcWrap64<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, {0x79, nullptr, "CreateProcess"}, {0x7A, nullptr, "StartProcess"}, {0x7B, nullptr, "TerminateProcess"}, - {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"}, - {0x7D, SvcWrap<CreateResourceLimit>, "CreateResourceLimit"}, - {0x7E, SvcWrap<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, + {0x7C, SvcWrap64<GetProcessInfo>, "GetProcessInfo"}, + {0x7D, SvcWrap64<CreateResourceLimit>, "CreateResourceLimit"}, + {0x7E, SvcWrap64<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, {0x7F, nullptr, "CallSecureMonitor"}, }; -static const FunctionDef* GetSVCInfo(u32 func_num) { - if (func_num >= std::size(SVC_Table)) { +static const FunctionDef* GetSVCInfo32(u32 func_num) { + if (func_num >= std::size(SVC_Table_32)) { + LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num); + return nullptr; + } + return &SVC_Table_32[func_num]; +} + +static const FunctionDef* GetSVCInfo64(u32 func_num) { + if (func_num >= std::size(SVC_Table_64)) { LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num); return nullptr; } - return &SVC_Table[func_num]; + return &SVC_Table_64[func_num]; } MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); @@ -2464,7 +2662,8 @@ void CallSVC(Core::System& system, u32 immediate) { // Lock the global kernel mutex when we enter the kernel HLE. std::lock_guard lock{HLE::g_hle_lock}; - const FunctionDef* info = GetSVCInfo(immediate); + const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate) + : GetSVCInfo32(immediate); if (info) { if (info->func) { info->func(system); diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 29a2cfa9d..7d735e3fa 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -15,6 +15,10 @@ static inline u64 Param(const Core::System& system, int n) { return system.CurrentArmInterface().GetReg(n); } +static inline u32 Param32(const Core::System& system, int n) { + return static_cast<u32>(system.CurrentArmInterface().GetReg(n)); +} + /** * HLE a function return from the current ARM userland process * @param system System context @@ -24,40 +28,44 @@ static inline void FuncReturn(Core::System& system, u64 result) { system.CurrentArmInterface().SetReg(0, result); } +static inline void FuncReturn32(Core::System& system, u32 result) { + system.CurrentArmInterface().SetReg(0, (u64)result); +} + //////////////////////////////////////////////////////////////////////////////////////////////////// // Function wrappers that return type ResultCode template <ResultCode func(Core::System&, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0)).raw); } template <ResultCode func(Core::System&, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); } template <ResultCode func(Core::System&, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); } template <ResultCode func(Core::System&, u32, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn( system, func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw); } template <ResultCode func(Core::System&, u32, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), Param(system, 3)) .raw); } template <ResultCode func(Core::System&, u32*)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param = 0; const u32 retval = func(system, ¶m).raw; system.CurrentArmInterface().SetReg(1, param); @@ -65,7 +73,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; system.CurrentArmInterface().SetReg(1, param_1); @@ -73,7 +81,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u32*)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; u32 param_2 = 0; const u32 retval = func(system, ¶m_1, ¶m_2).raw; @@ -86,7 +94,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; system.CurrentArmInterface().SetReg(1, param_1); @@ -94,7 +102,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw; @@ -104,7 +112,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64*, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u64 param_1 = 0; const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1))).raw; @@ -113,12 +121,12 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw); } template <ResultCode func(Core::System&, u64*, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u64 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1)).raw; @@ -127,7 +135,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64*, u32, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u64 param_1 = 0; const u32 retval = func(system, ¶m_1, static_cast<u32>(Param(system, 1)), static_cast<u32>(Param(system, 2))) @@ -138,19 +146,19 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw); } template <ResultCode func(Core::System&, u32, u32, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)), Param(system, 2)) .raw); } template <ResultCode func(Core::System&, u32, u32*, u64*)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; u64 param_2 = 0; const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), ¶m_1, ¶m_2); @@ -161,54 +169,54 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64, u64, u32, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) .raw); } template <ResultCode func(Core::System&, u64, u64, u32, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2)), Param(system, 3)) .raw); } template <ResultCode func(Core::System&, u32, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), static_cast<u32>(Param(system, 2))) .raw); } template <ResultCode func(Core::System&, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw); } template <ResultCode func(Core::System&, u64, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn( system, func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw); } template <ResultCode func(Core::System&, u32, u64, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), static_cast<u32>(Param(system, 3))) .raw); } template <ResultCode func(Core::System&, u32, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn( system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw); } template <ResultCode func(Core::System&, u32*, u64, u64, s64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) @@ -219,14 +227,14 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64, u64, u32, s64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) .raw); } template <ResultCode func(Core::System&, u64*, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u64 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw; @@ -236,7 +244,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), Param(system, 3), static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5))) @@ -247,7 +255,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u32*, u64, u64, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), Param(system, 2), static_cast<u32>(Param(system, 3))) @@ -258,7 +266,7 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, Handle*, u64, u32, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { u32 param_1 = 0; const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) @@ -269,14 +277,14 @@ void SvcWrap(Core::System& system) { } template <ResultCode func(Core::System&, u64, u32, s32, s64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) .raw); } template <ResultCode func(Core::System&, u64, u32, s32, s32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) .raw); @@ -286,7 +294,7 @@ void SvcWrap(Core::System& system) { // Function wrappers that return type u32 template <u32 func(Core::System&)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system)); } @@ -294,7 +302,7 @@ void SvcWrap(Core::System& system) { // Function wrappers that return type u64 template <u64 func(Core::System&)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { FuncReturn(system, func(system)); } @@ -302,44 +310,110 @@ void SvcWrap(Core::System& system) { /// Function wrappers that return type void template <void func(Core::System&)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system); } template <void func(Core::System&, u32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, static_cast<u32>(Param(system, 0))); } template <void func(Core::System&, u32, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), Param(system, 3)); } template <void func(Core::System&, s64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, static_cast<s64>(Param(system, 0))); } template <void func(Core::System&, u64, s32)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, Param(system, 0), static_cast<s32>(Param(system, 1))); } template <void func(Core::System&, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, Param(system, 0), Param(system, 1)); } template <void func(Core::System&, u64, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, Param(system, 0), Param(system, 1), Param(system, 2)); } template <void func(Core::System&, u32, u64, u64)> -void SvcWrap(Core::System& system) { +void SvcWrap64(Core::System& system) { func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)); } +// Used by QueryMemory32 +template <ResultCode func(Core::System&, u32, u32, u32)> +void SvcWrap32(Core::System& system) { + FuncReturn32(system, + func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw); +} + +// Used by GetInfo32 +template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + u32 param_2 = 0; + + const u32 retval = func(system, ¶m_1, ¶m_2, Param32(system, 0), Param32(system, 1), + Param32(system, 2), Param32(system, 3)) + .raw; + + system.CurrentArmInterface().SetReg(1, param_1); + system.CurrentArmInterface().SetReg(2, param_2); + FuncReturn(system, retval); +} + +// Used by GetThreadPriority32, ConnectToNamedPort32 +template <ResultCode func(Core::System&, u32*, u32)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + const u32 retval = func(system, ¶m_1, Param32(system, 1)).raw; + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); +} + +// Used by GetThreadId32 +template <ResultCode func(Core::System&, u32*, u32*, u32)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + u32 param_2 = 0; + + const u32 retval = func(system, ¶m_1, ¶m_2, Param32(system, 1)).raw; + system.CurrentArmInterface().SetReg(1, param_1); + system.CurrentArmInterface().SetReg(2, param_2); + FuncReturn(system, retval); +} + +// Used by SignalProcessWideKey32 +template <void func(Core::System&, u32, s32)> +void SvcWrap32(Core::System& system) { + func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1))); +} + +// Used by SendSyncRequest32 +template <ResultCode func(Core::System&, u32)> +void SvcWrap32(Core::System& system) { + FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); +} + +// Used by WaitSynchronization32 +template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)> +void SvcWrap32(Core::System& system) { + u32 param_1 = 0; + const u32 retval = func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2), + Param32(system, 3), ¶m_1) + .raw; + system.CurrentArmInterface().SetReg(1, param_1); + FuncReturn(system, retval); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index bf850e0b2..83e956036 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -133,15 +133,16 @@ void Thread::CancelWait() { ResumeFromWait(); } -/** - * Resets a thread context, making it ready to be scheduled and run by the CPU - * @param context Thread context to reset - * @param stack_top Address of the top of the stack - * @param entry_point Address of entry point for execution - * @param arg User argument for thread - */ -static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top, - VAddr entry_point, u64 arg) { +static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top, + u32 entry_point, u32 arg) { + context = {}; + context.cpu_registers[0] = arg; + context.cpu_registers[15] = entry_point; + context.cpu_registers[13] = stack_top; +} + +static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, VAddr stack_top, + VAddr entry_point, u64 arg) { context = {}; context.cpu_registers[0] = arg; context.pc = entry_point; @@ -198,9 +199,9 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin thread->owner_process->RegisterThread(thread.get()); - // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used - // to initialize the context - ResetThreadContext(thread->context, stack_top, entry_point, arg); + ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top), + static_cast<u32>(entry_point), static_cast<u32>(arg)); + ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); return MakeResult<std::shared_ptr<Thread>>(std::move(thread)); } @@ -213,11 +214,13 @@ void Thread::SetPriority(u32 priority) { } void Thread::SetWaitSynchronizationResult(ResultCode result) { - context.cpu_registers[0] = result.raw; + context_32.cpu_registers[0] = result.raw; + context_64.cpu_registers[0] = result.raw; } void Thread::SetWaitSynchronizationOutput(s32 output) { - context.cpu_registers[1] = output; + context_32.cpu_registers[1] = output; + context_64.cpu_registers[1] = output; } s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 129e7858a..23fdef8a4 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -102,7 +102,8 @@ public: using MutexWaitingThreads = std::vector<std::shared_ptr<Thread>>; - using ThreadContext = Core::ARM_Interface::ThreadContext; + using ThreadContext32 = Core::ARM_Interface::ThreadContext32; + using ThreadContext64 = Core::ARM_Interface::ThreadContext64; using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; @@ -273,12 +274,20 @@ public: return status == ThreadStatus::WaitSynch; } - ThreadContext& GetContext() { - return context; + ThreadContext32& GetContext32() { + return context_32; } - const ThreadContext& GetContext() const { - return context; + const ThreadContext32& GetContext32() const { + return context_32; + } + + ThreadContext64& GetContext64() { + return context_64; + } + + const ThreadContext64& GetContext64() const { + return context_64; } ThreadStatus GetStatus() const { @@ -466,7 +475,8 @@ private: void AdjustSchedulingOnPriority(u32 old_priority); void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); - Core::ARM_Interface::ThreadContext context{}; + ThreadContext32 context_32{}; + ThreadContext64 context_64{}; u64 thread_id = 0; diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index cc978713b..d1bf13c89 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -607,7 +607,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system, {40, nullptr, "GetCradleFwVersion"}, {50, nullptr, "IsVrModeEnabled"}, {51, nullptr, "SetVrModeEnabled"}, - {52, nullptr, "SwitchLcdBacklight"}, + {52, &ICommonStateGetter::SetLcdBacklighOffEnabled, "SetLcdBacklighOffEnabled"}, {53, nullptr, "BeginVrModeEx"}, {54, nullptr, "EndVrModeEx"}, {55, nullptr, "IsInControllerFirmwareUpdateSection"}, @@ -636,7 +636,6 @@ void ICommonStateGetter::GetBootMode(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.Push<u8>(static_cast<u8>(Service::PM::SystemBootMode::Normal)); // Normal boot mode } @@ -660,6 +659,7 @@ void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) { rb.PushEnum<AppletMessageQueue::AppletMessage>(message); return; } + rb.Push(RESULT_SUCCESS); rb.PushEnum<AppletMessageQueue::AppletMessage>(message); } @@ -672,6 +672,17 @@ void ICommonStateGetter::GetCurrentFocusState(Kernel::HLERequestContext& ctx) { rb.Push(static_cast<u8>(FocusState::InFocus)); } +void ICommonStateGetter::SetLcdBacklighOffEnabled(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto is_lcd_backlight_off_enabled = rp.Pop<bool>(); + + LOG_WARNING(Service_AM, "(STUBBED) called. is_lcd_backlight_off_enabled={}", + is_lcd_backlight_off_enabled); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + void ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 0b9a4332d..0843de781 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -182,6 +182,7 @@ private: void GetOperationMode(Kernel::HLERequestContext& ctx); void GetPerformanceMode(Kernel::HLERequestContext& ctx); void GetBootMode(Kernel::HLERequestContext& ctx); + void SetLcdBacklighOffEnabled(Kernel::HLERequestContext& ctx); void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); void SetCpuBoostMode(Kernel::HLERequestContext& ctx); diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index d19c3623c..53559e8b1 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -129,12 +129,6 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect } metadata.Print(); - const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()}; - if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit || - arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) { - return {ResultStatus::Error32BitISA, {}}; - } - if (process.LoadFromMetadata(metadata).IsError()) { return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; } diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index f95eee3b1..85ac81ef7 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -111,7 +111,7 @@ json GetProcessorStateDataAuto(Core::System& system) { const auto& vm_manager{process->VMManager()}; auto& arm{system.CurrentArmInterface()}; - Core::ARM_Interface::ThreadContext context{}; + Core::ARM_Interface::ThreadContext64 context{}; arm.SaveContext(context); return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32", diff --git a/src/core/settings.cpp b/src/core/settings.cpp index d1fc94060..7c0303684 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -94,6 +94,7 @@ void LogSettings() { LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); LogSetting("Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation); + LogSetting("Renderer_UseVsync", Settings::values.use_vsync); LogSetting("Audio_OutputEngine", Settings::values.sink_id); LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); diff --git a/src/core/settings.h b/src/core/settings.h index f837d3fbc..15b691342 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -435,6 +435,7 @@ struct Values { bool use_disk_shader_cache; bool use_accurate_gpu_emulation; bool use_asynchronous_gpu_emulation; + bool use_vsync; bool force_30fps_mode; float bg_red; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 0e72d31cd..0f3685d1c 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -188,6 +188,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { Settings::values.use_accurate_gpu_emulation); AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation); + AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); } diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index 2228571a6..e82ae7ef1 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp @@ -32,8 +32,16 @@ public: SocketCallback callback) : callback(std::move(callback)), timer(io_service), socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id), - pad_index(pad_index), - send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {} + pad_index(pad_index) { + boost::system::error_code ec{}; + auto ipv4 = boost::asio::ip::make_address_v4(host, ec); + if (ec.failed()) { + LOG_ERROR(Input, "Invalid IPv4 address \"{}\" provided to socket", host); + ipv4 = boost::asio::ip::address_v4{}; + } + + send_endpoint = {udp::endpoint(ipv4, port)}; + } void Stop() { io_service.stop(); @@ -85,17 +93,18 @@ private: } void HandleSend(const boost::system::error_code& error) { + boost::system::error_code _ignored{}; // Send a request for getting port info for the pad Request::PortInfo port_info{1, {pad_index, 0, 0, 0}}; const auto port_message = Request::Create(port_info, client_id); std::memcpy(&send_buffer1, &port_message, PORT_INFO_SIZE); - socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint); + socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint, {}, _ignored); // Send a request for getting pad data for the pad Request::PadData pad_data{Request::PadData::Flags::Id, pad_index, EMPTY_MAC_ADDRESS}; const auto pad_message = Request::Create(pad_data, client_id); std::memcpy(send_buffer2.data(), &pad_message, PAD_DATA_SIZE); - socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint); + socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint, {}, _ignored); StartSend(timer.expiry()); } diff --git a/src/input_common/udp/protocol.cpp b/src/input_common/udp/protocol.cpp index a982ac49d..5e50bd612 100644 --- a/src/input_common/udp/protocol.cpp +++ b/src/input_common/udp/protocol.cpp @@ -31,7 +31,6 @@ namespace Response { */ std::optional<Type> Validate(u8* data, std::size_t size) { if (size < sizeof(Header)) { - LOG_DEBUG(Input, "Invalid UDP packet received"); return std::nullopt; } Header header{}; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 2cdf1aa7f..b1088af3d 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,7 +5,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/frontend/scope_acquire_window_context.h" +#include "core/frontend/scope_acquire_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" #include "video_core/gpu_thread.h" @@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p return; } - Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; + Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()}; CommandDataContainer next; while (state.is_running) { diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index af1bebc4f..5ec99a126 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -35,15 +35,19 @@ public: explicit RendererBase(Core::Frontend::EmuWindow& window); virtual ~RendererBase(); - /// Swap buffers (render frame) - virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; - /// Initialize the renderer virtual bool Init() = 0; /// Shutdown the renderer virtual void ShutDown() = 0; + /// Finalize rendering the guest frame and draw into the presentation texture + virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; + + /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer + /// specific implementation) + virtual void TryPresent(int timeout_ms) = 0; + // Getter/setter functions: // ------------------------ diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e1965fb21..3fcd319fd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -36,6 +36,7 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; @@ -56,8 +57,7 @@ namespace { template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - Tegra::Engines::ShaderType shader_type, - std::size_t index = 0) { + ShaderType shader_type, std::size_t index = 0) { if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); @@ -910,15 +910,10 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).sampler; for (const auto& entry : shader->GetShaderEntries().samplers) { - const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); - if (!entry.IsIndexed()) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); + const auto shader_type = static_cast<ShaderType>(stage_index); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); SetupTexture(binding++, texture, entry); - } else { - for (std::size_t i = 0; i < entry.Size(); ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); - SetupTexture(binding++, texture, entry); - } } } } @@ -928,16 +923,9 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().samplers) { - if (!entry.IsIndexed()) { - const auto texture = - GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); SetupTexture(binding++, texture, entry); - } else { - for (std::size_t i = 0; i < entry.Size(); ++i) { - const auto texture = - GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); - SetupTexture(binding++, texture, entry); - } } } } diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index f0ddfb276..c0aee770f 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -15,6 +15,24 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R namespace OpenGL { +void OGLRenderbuffer::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glGenRenderbuffers(1, &handle); +} + +void OGLRenderbuffer::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteRenderbuffers(1, &handle); + OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply(); + handle = 0; +} + void OGLTexture::Create(GLenum target) { if (handle != 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 514d1d165..995a4e45e 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -11,6 +11,31 @@ namespace OpenGL { +class OGLRenderbuffer : private NonCopyable { +public: + OGLRenderbuffer() = default; + + OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLRenderbuffer() { + Release(); + } + + OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create(); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + class OGLTexture : private NonCopyable { public: OGLTexture() = default; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index ab1f7983c..7d3bc1a1f 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -423,6 +423,13 @@ void OpenGLState::ApplyClipControl() { } } +void OpenGLState::ApplyRenderBuffer() { + if (cur_state.renderbuffer != renderbuffer) { + cur_state.renderbuffer = renderbuffer; + glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); + } +} + void OpenGLState::ApplyTextures() { const std::size_t size = std::size(textures); for (std::size_t i = 0; i < size; ++i) { @@ -478,6 +485,7 @@ void OpenGLState::Apply() { ApplyPolygonOffset(); ApplyAlphaTest(); ApplyClipControl(); + ApplyRenderBuffer(); } void OpenGLState::EmulateViewportWithScissor() { @@ -551,4 +559,11 @@ OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) { return *this; } +OpenGLState& OpenGLState::ResetRenderbuffer(GLuint handle) { + if (renderbuffer == handle) { + renderbuffer = 0; + } + return *this; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 4953eeda2..bce662f2c 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -158,6 +158,8 @@ public: GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE; } clip_control; + GLuint renderbuffer{}; // GL_RENDERBUFFER_BINDING + OpenGLState(); /// Get the currently active OpenGL state @@ -196,6 +198,7 @@ public: void ApplyPolygonOffset(); void ApplyAlphaTest(); void ApplyClipControl(); + void ApplyRenderBuffer(); /// Resets any references to the given resource OpenGLState& UnbindTexture(GLuint handle); @@ -204,6 +207,7 @@ public: OpenGLState& ResetPipeline(GLuint handle); OpenGLState& ResetVertexArray(GLuint handle); OpenGLState& ResetFramebuffer(GLuint handle); + OpenGLState& ResetRenderbuffer(GLuint handle); /// Viewport does not affects glClearBuffer so emulate viewport using scissor test void EmulateViewportWithScissor(); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 7ed505628..d3dea3659 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { } case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: + case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return GL_UNSIGNED_SHORT; + default: + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + return {}; + } + case Maxwell::VertexAttribute::Type::SignedScaled: + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: + case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return GL_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: + case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return GL_SHORT; default: LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bba16afaf..a4340b502 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -9,11 +9,11 @@ #include <glad/glad.h> #include "common/assert.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "common/telemetry.h" #include "core/core.h" #include "core/core_timing.h" #include "core/frontend/emu_window.h" -#include "core/frontend/scope_acquire_window_context.h" #include "core/memory.h" #include "core/perf_stats.h" #include "core/settings.h" @@ -24,6 +24,144 @@ namespace OpenGL { +// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have +// to wait on available presentation frames. +constexpr std::size_t SWAP_CHAIN_SIZE = 3; + +struct Frame { + u32 width{}; /// Width of the frame (to detect resize) + u32 height{}; /// Height of the frame + bool color_reloaded{}; /// Texture attachment was recreated (ie: resized) + OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO + OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread + OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread + GLsync render_fence{}; /// Fence created on the render thread + GLsync present_fence{}; /// Fence created on the presentation thread + bool is_srgb{}; /// Framebuffer is sRGB or RGB +}; + +/** + * For smooth Vsync rendering, we want to always present the latest frame that the core generates, + * but also make sure that rendering happens at the pace that the frontend dictates. This is a + * helper class that the renderer uses to sync frames between the render thread and the presentation + * thread + */ +class FrameMailbox { +public: + std::mutex swap_chain_lock; + std::condition_variable present_cv; + std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; + std::queue<Frame*> free_queue; + std::deque<Frame*> present_queue; + Frame* previous_frame{}; + + FrameMailbox() { + for (auto& frame : swap_chain) { + free_queue.push(&frame); + } + } + + ~FrameMailbox() { + // lock the mutex and clear out the present and free_queues and notify any people who are + // blocked to prevent deadlock on shutdown + std::scoped_lock lock{swap_chain_lock}; + std::queue<Frame*>().swap(free_queue); + present_queue.clear(); + present_cv.notify_all(); + } + + void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { + frame->present.Release(); + frame->present.Create(); + GLint previous_draw_fbo{}; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); + } + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); + frame->color_reloaded = false; + } + + void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { + OpenGLState prev_state = OpenGLState::GetCurState(); + OpenGLState state = OpenGLState::GetCurState(); + + // Recreate the color texture attachment + frame->color.Release(); + frame->color.Create(); + state.renderbuffer = frame->color.handle; + state.Apply(); + glRenderbufferStorage(GL_RENDERBUFFER, frame->is_srgb ? GL_SRGB8 : GL_RGB8, width, height); + + // Recreate the FBO for the render target + frame->render.Release(); + frame->render.Create(); + state.draw.read_framebuffer = frame->render.handle; + state.draw.draw_framebuffer = frame->render.handle; + state.Apply(); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); + } + prev_state.Apply(); + frame->width = width; + frame->height = height; + frame->color_reloaded = true; + } + + Frame* GetRenderFrame() { + std::unique_lock lock{swap_chain_lock}; + + // If theres no free frames, we will reuse the oldest render frame + if (free_queue.empty()) { + auto frame = present_queue.back(); + present_queue.pop_back(); + return frame; + } + + Frame* frame = free_queue.front(); + free_queue.pop(); + return frame; + } + + void ReleaseRenderFrame(Frame* frame) { + std::unique_lock lock{swap_chain_lock}; + present_queue.push_front(frame); + present_cv.notify_one(); + } + + Frame* TryGetPresentFrame(int timeout_ms) { + std::unique_lock lock{swap_chain_lock}; + // wait for new entries in the present_queue + present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), + [&] { return !present_queue.empty(); }); + if (present_queue.empty()) { + // timed out waiting for a frame to draw so return the previous frame + return previous_frame; + } + + // free the previous frame and add it back to the free queue + if (previous_frame) { + free_queue.push(previous_frame); + } + + // the newest entries are pushed to the front of the queue + Frame* frame = present_queue.front(); + present_queue.pop_front(); + // remove all old entries from the present queue and move them back to the free_queue + for (auto f : present_queue) { + free_queue.push(f); + } + present_queue.clear(); + previous_frame = frame; + return frame; + } +}; + namespace { constexpr char vertex_shader[] = R"( @@ -158,21 +296,91 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit } // Anonymous namespace RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) - : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} + : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, + frame_mailbox{std::make_unique<FrameMailbox>()} {} RendererOpenGL::~RendererOpenGL() = default; +MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64)); +MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128)); + void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { + render_window.PollEvents(); + + if (!framebuffer) { + return; + } + // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.AllDirty(); state.Apply(); + PrepareRendertarget(framebuffer); + RenderScreenshot(); + + Frame* frame; + { + MICROPROFILE_SCOPE(OpenGL_WaitPresent); + + frame = frame_mailbox->GetRenderFrame(); + + // Clean up sync objects before drawing + + // INTEL driver workaround. We can't delete the previous render sync object until we are + // sure that the presentation is done + if (frame->present_fence) { + glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); + } + + // delete the draw fence if the frame wasn't presented + if (frame->render_fence) { + glDeleteSync(frame->render_fence); + frame->render_fence = 0; + } + + // wait for the presentation to be done + if (frame->present_fence) { + glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); + glDeleteSync(frame->present_fence); + frame->present_fence = 0; + } + } + + { + MICROPROFILE_SCOPE(OpenGL_RenderFrame); + const auto& layout = render_window.GetFramebufferLayout(); + + // Recreate the frame if the size of the window has changed + if (layout.width != frame->width || layout.height != frame->height || + screen_info.display_srgb != frame->is_srgb) { + LOG_DEBUG(Render_OpenGL, "Reloading render frame"); + frame->is_srgb = screen_info.display_srgb; + frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height); + } + state.draw.draw_framebuffer = frame->render.handle; + state.Apply(); + DrawScreen(layout); + // Create a fence for the frontend to wait on and swap this frame to OffTex + frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); + frame_mailbox->ReleaseRenderFrame(frame); + m_current_frame++; + rasterizer->TickFrame(); + } + + // Restore the rasterizer state + prev_state.AllDirty(); + prev_state.Apply(); +} + +void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { if (framebuffer) { // If framebuffer is provided, reload it from memory to a texture if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || - screen_info.texture.pixel_format != framebuffer->pixel_format) { + screen_info.texture.pixel_format != framebuffer->pixel_format || + gl_framebuffer_data.empty()) { // Reallocate texture if the framebuffer size has changed. // This is expected to not happen very often and hence should not be a // performance problem. @@ -181,22 +389,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { // Load the framebuffer from memory, draw it to the screen, and swap buffers LoadFBToScreenInfo(*framebuffer); - - if (renderer_settings.screenshot_requested) - CaptureScreenshot(); - - DrawScreen(render_window.GetFramebufferLayout()); - - rasterizer->TickFrame(); - - render_window.SwapBuffers(); } - - render_window.PollEvents(); - - // Restore the rasterizer state - prev_state.AllDirty(); - prev_state.Apply(); } void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { @@ -418,13 +611,48 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { DrawScreenTriangles(screen_info, static_cast<float>(screen.left), static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()), static_cast<float>(screen.GetHeight())); +} - m_current_frame++; +void RendererOpenGL::TryPresent(int timeout_ms) { + const auto& layout = render_window.GetFramebufferLayout(); + auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms); + if (!frame) { + LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present"); + return; + } + + // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a + // readback since we won't be doing any blending + glClear(GL_COLOR_BUFFER_BIT); + + // Recreate the presentation FBO if the color attachment was changed + if (frame->color_reloaded) { + LOG_DEBUG(Render_OpenGL, "Reloading present frame"); + frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height); + } + glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED); + // INTEL workaround. + // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete + // it on the emulation thread without too much penalty + // glDeleteSync(frame.render_sync); + // frame.render_sync = 0; + + glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle); + glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height, + GL_COLOR_BUFFER_BIT, GL_LINEAR); + + // Insert fence for the main thread to block on + frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); } -void RendererOpenGL::UpdateFramerate() {} +void RendererOpenGL::RenderScreenshot() { + if (!renderer_settings.screenshot_requested) { + return; + } -void RendererOpenGL::CaptureScreenshot() { // Draw the current frame to the screenshot framebuffer screenshot_framebuffer.Create(); GLuint old_read_fb = state.draw.read_framebuffer; @@ -459,8 +687,6 @@ void RendererOpenGL::CaptureScreenshot() { } bool RendererOpenGL::Init() { - Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; - if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); glDebugMessageCallback(DebugHandler, nullptr); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b56328a7f..d45e69cbc 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -44,19 +44,23 @@ struct ScreenInfo { TextureInfo texture; }; +struct PresentationTexture { + u32 width = 0; + u32 height = 0; + OGLTexture texture; +}; + +class FrameMailbox; + class RendererOpenGL final : public VideoCore::RendererBase { public: explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); ~RendererOpenGL() override; - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - - /// Initialize the renderer bool Init() override; - - /// Shutdown the renderer void ShutDown() override; + void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void TryPresent(int timeout_ms) override; private: /// Initializes the OpenGL state and creates persistent objects. @@ -74,10 +78,7 @@ private: void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); - /// Updates the framerate. - void UpdateFramerate(); - - void CaptureScreenshot(); + void RenderScreenshot(); /// Loads framebuffer from emulated memory into the active OpenGL texture. void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); @@ -87,6 +88,8 @@ private: void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); + void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); + Core::Frontend::EmuWindow& emu_window; Core::System& system; @@ -107,6 +110,9 @@ private: /// Used for transforming the framebuffer orientation Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; Common::Rectangle<int> framebuffer_crop_rect; + + /// Frame presentation mailbox + std::unique_ptr<FrameMailbox> frame_mailbox; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index ef66dd141..aad0c895b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -371,8 +371,22 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr } case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return vk::Format::eR8Uscaled; case Maxwell::VertexAttribute::Size::Size_8_8: return vk::Format::eR8G8Uscaled; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return vk::Format::eR8G8B8Uscaled; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return vk::Format::eR8G8B8A8Uscaled; + case Maxwell::VertexAttribute::Size::Size_16: + return vk::Format::eR16Uscaled; + case Maxwell::VertexAttribute::Size::Size_16_16: + return vk::Format::eR16G16Uscaled; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return vk::Format::eR16G16B16Uscaled; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return vk::Format::eR16G16B16A16Uscaled; default: break; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d5032b432..ddc62bc97 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -106,8 +106,14 @@ RendererVulkan::~RendererVulkan() { } void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { + render_window.PollEvents(); + + if (!framebuffer) { + return; + } + const auto& layout = render_window.GetFramebufferLayout(); - if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) { + if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; const bool use_accelerated = rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); @@ -128,13 +134,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { blit_screen->Recreate(); } - render_window.SwapBuffers(); rasterizer->TickFrame(); } render_window.PollEvents(); } +void RendererVulkan::TryPresent(int /*timeout_ms*/) { + // TODO (bunnei): ImplementMe +} + bool RendererVulkan::Init() { PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); @@ -262,4 +271,4 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -} // namespace Vulkan
\ No newline at end of file +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index a472c5dc9..f513397f0 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -36,14 +36,10 @@ public: explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); ~RendererVulkan() override; - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - - /// Initialize the renderer bool Init() override; - - /// Shutdown the renderer void ShutDown() override; + void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void TryPresent(int timeout_ms) override; private: std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9d5b8de7a..60f57d83e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -73,7 +73,7 @@ UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; u32 binding = 0; u32 offset = 0; - FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries); + FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); if (template_entries.empty()) { // If the shader doesn't use descriptor sets, skip template creation. return UniqueDescriptorUpdateTemplate{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b155dfb49..6a02403c1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -97,8 +97,7 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat u32 offset = 0; for (const auto& stage : program) { if (stage) { - FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset, - template_entries); + FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); } } if (template_entries.empty()) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7ddf7d3ee..696e4b291 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -36,6 +36,13 @@ using Tegra::Engines::ShaderType; namespace { +// C++20's using enum +constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer; +constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer; +constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer; +constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler; +constexpr auto eStorageImage = vk::DescriptorType::eStorageImage; + constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ VideoCommon::Shader::CompileDepth::FullDecompile}; @@ -119,23 +126,32 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { } } +template <vk::DescriptorType descriptor_type, class Container> +void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding, + vk::ShaderStageFlags stage_flags, const Container& container) { + const u32 num_entries = static_cast<u32>(std::size(container)); + for (std::size_t i = 0; i < num_entries; ++i) { + u32 count = 1; + if constexpr (descriptor_type == eCombinedImageSampler) { + // Combined image samplers can be arrayed. + count = container[i].Size(); + } + bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr); + } +} + u32 FillDescriptorLayout(const ShaderEntries& entries, std::vector<vk::DescriptorSetLayoutBinding>& bindings, Maxwell::ShaderProgram program_type, u32 base_binding) { const ShaderType stage = GetStageFromProgram(program_type); - const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); + const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); u32 binding = base_binding; - const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { - for (std::size_t i = 0; i < num_entries; ++i) { - bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); - } - }; - AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); - AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); - AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); - AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); - AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); + AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers); + AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers); + AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers); + AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers); + AddBindings<eStorageImage>(bindings, binding, flags, entries.images); return binding; } @@ -361,32 +377,45 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { return {std::move(program), std::move(bindings)}; } -void FillDescriptorUpdateTemplateEntries( - const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { - static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); - const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { - const u32 count = static_cast<u32>(count_); - if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && - device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { - // Nvidia has a bug where updating multiple uniform texels at once causes the driver to - // crash. - for (u32 i = 0; i < count; ++i) { - template_entries.emplace_back(binding + i, 0, 1, descriptor_type, - offset + i * entry_size, entry_size); - } - } else if (count != 0) { - template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); +template <vk::DescriptorType descriptor_type, class Container> +void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding, + u32& offset, const Container& container) { + static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); + const u32 count = static_cast<u32>(std::size(container)); + + if constexpr (descriptor_type == eCombinedImageSampler) { + for (u32 i = 0; i < count; ++i) { + const u32 num_samplers = container[i].Size(); + template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset, + entry_size); + ++binding; + offset += num_samplers * entry_size; } - offset += count * entry_size; - binding += count; - }; + return; + } - AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); - AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); - AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); - AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); - AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); + if constexpr (descriptor_type == eUniformTexelBuffer) { + // Nvidia has a bug where updating multiple uniform texels at once causes the driver to + // crash. + for (u32 i = 0; i < count; ++i) { + template_entries.emplace_back(binding + i, 0, 1, descriptor_type, + offset + i * entry_size, entry_size); + } + } else if (count > 0) { + template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); + } + offset += count * entry_size; + binding += count; +} + +void FillDescriptorUpdateTemplateEntries( + const ShaderEntries& entries, u32& binding, u32& offset, + std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { + AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers); + AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers); + AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers); + AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers); + AddEntry<eStorageImage>(template_entries, offset, binding, entries.images); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 8678fc9c3..92a670cc7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -194,7 +194,7 @@ private: }; void FillDescriptorUpdateTemplateEntries( - const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, + const ShaderEntries& entries, u32& binding, u32& offset, std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3bf86da87..3fe28c204 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -105,17 +105,20 @@ void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlag template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - std::size_t stage) { + std::size_t stage, std::size_t index = 0) { const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); return engine.GetTextureInfo(tex_handle); } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); + const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); + const u32 offset = entry.GetOffset() + entry_offset; if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { - return engine.GetStageTexture(stage_type, entry.GetOffset()); + return engine.GetStageTexture(stage_type, offset); } else { - return engine.GetTexture(entry.GetOffset()); + return engine.GetTexture(offset); } } @@ -836,8 +839,10 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: MICROPROFILE_SCOPE(Vulkan_Textures); const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.samplers) { - const auto texture = GetTextureInfo(gpu, entry, stage); - SetupTexture(texture, entry); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(gpu, entry, stage, i); + SetupTexture(texture, entry); + } } } @@ -886,8 +891,10 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.samplers) { - const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); - SetupTexture(texture, entry); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i); + SetupTexture(texture, entry); + } } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 2da622d15..cfcca5af0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -69,8 +69,9 @@ struct TexelBuffer { struct SampledImage { Id image_type{}; - Id sampled_image_type{}; - Id sampler{}; + Id sampler_type{}; + Id sampler_pointer_type{}; + Id variable{}; }; struct StorageImage { @@ -833,16 +834,20 @@ private: constexpr int sampled = 1; constexpr auto format = spv::ImageFormat::Unknown; const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id sampled_image_type = TypeSampledImage(image_type); - const Id pointer_type = - TypePointer(spv::StorageClass::UniformConstant, sampled_image_type); + const Id sampler_type = TypeSampledImage(image_type); + const Id sampler_pointer_type = + TypePointer(spv::StorageClass::UniformConstant, sampler_type); + const Id type = sampler.IsIndexed() + ? TypeArray(sampler_type, Constant(t_uint, sampler.Size())) + : sampler_type; + const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex()))); Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - sampled_images.emplace(sampler.GetIndex(), - SampledImage{image_type, sampled_image_type, id}); + sampled_images.emplace(sampler.GetIndex(), SampledImage{image_type, sampler_type, + sampler_pointer_type, id}); } return binding; } @@ -1525,7 +1530,12 @@ private: ASSERT(!meta.sampler.IsBuffer()); const auto& entry = sampled_images.at(meta.sampler.GetIndex()); - return OpLoad(entry.sampled_image_type, entry.sampler); + Id sampler = entry.variable; + if (meta.sampler.IsIndexed()) { + const Id index = AsInt(Visit(meta.index)); + sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); + } + return OpLoad(entry.sampler_type, sampler); } Id GetTextureImage(Operation operation) { @@ -2211,16 +2221,14 @@ private: switch (specialization.attribute_types.at(location)) { case Maxwell::VertexAttribute::Type::SignedNorm: case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: case Maxwell::VertexAttribute::Type::Float: return {Type::Float, t_in_float, t_in_float4}; case Maxwell::VertexAttribute::Type::SignedInt: return {Type::Int, t_in_int, t_in_int4}; case Maxwell::VertexAttribute::Type::UnsignedInt: return {Type::Uint, t_in_uint, t_in_uint4}; - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - UNIMPLEMENTED(); - return {Type::Float, t_in_float, t_in_float4}; default: UNREACHABLE(); return {Type::Float, t_in_float, t_in_float4}; diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 90240c765..478394682 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); - // TODO(Rodrigo): Should precise be used when there's a postfactor? - Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); + static constexpr std::array FmulPostFactor = { + 1.000f, // None + 0.500f, // Divide 2 + 0.250f, // Divide 4 + 0.125f, // Divide 8 + 8.000f, // Mul 8 + 4.000f, // Mul 4 + 2.000f, // Mul 2 + }; if (instr.fmul.postfactor != 0) { - auto postfactor = static_cast<s32>(instr.fmul.postfactor); - - // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below - // logic. - if (postfactor >= 4) { - postfactor = 7 - postfactor; - } else { - postfactor = 0 - postfactor; - } - - if (postfactor > 0) { - value = Operation(OperationCode::FMul, NO_PRECISE, value, - Immediate(static_cast<f32>(1 << postfactor))); - } else { - value = Operation(OperationCode::FDiv, NO_PRECISE, value, - Immediate(static_cast<f32>(1 << -postfactor))); - } + op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, + Immediate(FmulPostFactor[instr.fmul.postfactor])); } + // TODO(Rodrigo): Should precise be used when there's a postfactor? + Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); + value = GetSaturatedFloat(value, instr.alu.saturate_d); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 21366869d..2fe787d6f 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -293,44 +293,66 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc) { - constexpr u32 lop_iterations = 32; - const Node one = Immediate(1); - const Node two = Immediate(2); - - Node value; - for (u32 i = 0; i < lop_iterations; ++i) { - const Node shift_amount = Immediate(i); - - const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); - const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); - - const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); - const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); - const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); - - const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); - const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); - const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); - - const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); - const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); - - const Node shifted_bit = - Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); - const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); - - const Node right = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); - - if (i > 0) { - value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); - } else { - value = right; + const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { + Node value = Immediate(0); + const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); + if (imm.GetValue() & 0x01) { + const Node a = Operation(OperationCode::IBitwiseNot, na); + const Node b = Operation(OperationCode::IBitwiseNot, nb); + const Node c = Operation(OperationCode::IBitwiseNot, nc); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); + value = Operation(OperationCode::IBitwiseOr, value, r); } - } + if (imm.GetValue() & 0x02) { + const Node a = Operation(OperationCode::IBitwiseNot, na); + const Node b = Operation(OperationCode::IBitwiseNot, nb); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x04) { + const Node a = Operation(OperationCode::IBitwiseNot, na); + const Node c = Operation(OperationCode::IBitwiseNot, nc); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x08) { + const Node a = Operation(OperationCode::IBitwiseNot, na); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x10) { + const Node b = Operation(OperationCode::IBitwiseNot, nb); + const Node c = Operation(OperationCode::IBitwiseNot, nc); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x20) { + const Node b = Operation(OperationCode::IBitwiseNot, nb); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x40) { + const Node c = Operation(OperationCode::IBitwiseNot, nc); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x80) { + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + return value; + }(op_a, op_b, op_c, imm_lut); - SetInternalFlagsFromInteger(bb, value, sets_cc); - SetRegister(bb, dest, value); + SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); + SetRegister(bb, dest, lop3_fast); } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index a0a7b9111..a1828546e 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -299,7 +299,7 @@ private: u32 index{}; ///< Emulated index given for the this sampler. u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). - u32 size{}; ///< Size of the sampler if indexed. + u32 size{1}; ///< Size of the sampler. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index face8c943..15e22b9fa 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -157,13 +157,21 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { return {}; } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; + s64 current_cursor = cursor; + while (current_cursor > 0) { + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same + // register that it uses as operand + const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1); + current_cursor = new_cursor; + if (!source) { + continue; + } + const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor); + if (base_address != nullptr) { + return {base_address, index, offset}; + } } - return TrackCbuf(source, code, new_cursor); + return {}; } if (const auto operation = std::get_if<OperationNode>(&*tracked)) { for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 55a37fffa..c3dbb1a88 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -9,6 +9,9 @@ #include <QKeyEvent> #include <QMessageBox> #include <QOffscreenSurface> +#include <QOpenGLContext> +#include <QOpenGLFunctions> +#include <QOpenGLFunctions_4_3_Core> #include <QOpenGLWindow> #include <QPainter> #include <QScreen> @@ -23,9 +26,10 @@ #include "common/assert.h" #include "common/microprofile.h" #include "common/scm_rev.h" +#include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/framebuffer_layout.h" -#include "core/frontend/scope_acquire_window_context.h" +#include "core/frontend/scope_acquire_context.h" #include "core/settings.h" #include "input_common/keyboard.h" #include "input_common/main.h" @@ -35,15 +39,27 @@ #include "yuzu/bootmanager.h" #include "yuzu/main.h" -EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} +EmuThread::EmuThread(GRenderWindow& window) + : shared_context{window.CreateSharedContext()}, + context{(Settings::values.use_asynchronous_gpu_emulation && shared_context) ? *shared_context + : window} {} EmuThread::~EmuThread() = default; -void EmuThread::run() { - render_window->MakeCurrent(); +static GMainWindow* GetMainWindow() { + for (QWidget* w : qApp->topLevelWidgets()) { + if (GMainWindow* main = qobject_cast<GMainWindow*>(w)) { + return main; + } + } + return nullptr; +} +void EmuThread::run() { MicroProfileOnThreadCreate("EmuThread"); + Core::Frontend::ScopeAcquireContext acquire_context{context}; + emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources( @@ -53,11 +69,6 @@ void EmuThread::run() { emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); - if (Settings::values.use_asynchronous_gpu_emulation) { - // Release OpenGL context for the GPU thread - render_window->DoneCurrent(); - } - // Holds whether the cpu was running during the last iteration, // so that the DebugModeLeft signal can be emitted before the // next execution step @@ -98,190 +109,202 @@ void EmuThread::run() { #if MICROPROFILE_ENABLED MicroProfileOnThreadExit(); #endif - - render_window->moveContext(); } class GGLContext : public Core::Frontend::GraphicsContext { public: - explicit GGLContext(QOpenGLContext* shared_context) : shared_context{shared_context} { - context.setFormat(shared_context->format()); - context.setShareContext(shared_context); - context.create(); + explicit GGLContext(QOpenGLContext* shared_context) + : context(new QOpenGLContext(shared_context->parent())), + surface(new QOffscreenSurface(nullptr)) { + + // disable vsync for any shared contexts + auto format = shared_context->format(); + format.setSwapInterval(0); + + context->setShareContext(shared_context); + context->setFormat(format); + context->create(); + surface->setParent(shared_context->parent()); + surface->setFormat(format); + surface->create(); } void MakeCurrent() override { - context.makeCurrent(shared_context->surface()); + context->makeCurrent(surface); } void DoneCurrent() override { - context.doneCurrent(); + context->doneCurrent(); } - void SwapBuffers() override {} - private: - QOpenGLContext* shared_context; - QOpenGLContext context; + QOpenGLContext* context; + QOffscreenSurface* surface; }; -class GWidgetInternal : public QWindow { +class ChildRenderWindow : public QWindow { public: - GWidgetInternal(GRenderWindow* parent) : parent(parent) {} - virtual ~GWidgetInternal() = default; + ChildRenderWindow(QWindow* parent, QWidget* event_handler) + : QWindow{parent}, event_handler{event_handler} {} - void resizeEvent(QResizeEvent* ev) override { - parent->OnClientAreaResized(ev->size().width(), ev->size().height()); - parent->OnFramebufferSizeChanged(); - } + virtual ~ChildRenderWindow() = default; - void keyPressEvent(QKeyEvent* event) override { - InputCommon::GetKeyboard()->PressKey(event->key()); - } + virtual void Present() = 0; - void keyReleaseEvent(QKeyEvent* event) override { - InputCommon::GetKeyboard()->ReleaseKey(event->key()); +protected: + bool event(QEvent* event) override { + switch (event->type()) { + case QEvent::UpdateRequest: + Present(); + return true; + case QEvent::MouseButtonPress: + case QEvent::MouseButtonRelease: + case QEvent::MouseButtonDblClick: + case QEvent::MouseMove: + case QEvent::KeyPress: + case QEvent::KeyRelease: + case QEvent::FocusIn: + case QEvent::FocusOut: + case QEvent::FocusAboutToChange: + case QEvent::Enter: + case QEvent::Leave: + case QEvent::Wheel: + case QEvent::TabletMove: + case QEvent::TabletPress: + case QEvent::TabletRelease: + case QEvent::TabletEnterProximity: + case QEvent::TabletLeaveProximity: + case QEvent::TouchBegin: + case QEvent::TouchUpdate: + case QEvent::TouchEnd: + case QEvent::InputMethodQuery: + case QEvent::TouchCancel: + return QCoreApplication::sendEvent(event_handler, event); + case QEvent::Drop: + GetMainWindow()->DropAction(static_cast<QDropEvent*>(event)); + return true; + case QEvent::DragResponse: + case QEvent::DragEnter: + case QEvent::DragLeave: + case QEvent::DragMove: + GetMainWindow()->AcceptDropEvent(static_cast<QDropEvent*>(event)); + return true; + default: + return QWindow::event(event); + } } - void mousePressEvent(QMouseEvent* event) override { - if (event->source() == Qt::MouseEventSynthesizedBySystem) - return; // touch input is handled in TouchBeginEvent - - const auto pos{event->pos()}; - if (event->button() == Qt::LeftButton) { - const auto [x, y] = parent->ScaleTouch(pos); - parent->TouchPressed(x, y); - } else if (event->button() == Qt::RightButton) { - InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y()); - } + void exposeEvent(QExposeEvent* event) override { + QWindow::requestUpdate(); + QWindow::exposeEvent(event); } - void mouseMoveEvent(QMouseEvent* event) override { - if (event->source() == Qt::MouseEventSynthesizedBySystem) - return; // touch input is handled in TouchUpdateEvent +private: + QWidget* event_handler{}; +}; - const auto pos{event->pos()}; - const auto [x, y] = parent->ScaleTouch(pos); - parent->TouchMoved(x, y); - InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y()); - } +class OpenGLWindow final : public ChildRenderWindow { +public: + OpenGLWindow(QWindow* parent, QWidget* event_handler, QOpenGLContext* shared_context) + : ChildRenderWindow{parent, event_handler}, + context(new QOpenGLContext(shared_context->parent())) { - void mouseReleaseEvent(QMouseEvent* event) override { - if (event->source() == Qt::MouseEventSynthesizedBySystem) - return; // touch input is handled in TouchEndEvent + // disable vsync for any shared contexts + auto format = shared_context->format(); + format.setSwapInterval(Settings::values.use_vsync ? 1 : 0); + this->setFormat(format); - if (event->button() == Qt::LeftButton) - parent->TouchReleased(); - else if (event->button() == Qt::RightButton) - InputCommon::GetMotionEmu()->EndTilt(); - } + context->setShareContext(shared_context); + context->setScreen(this->screen()); + context->setFormat(format); + context->create(); - void DisablePainting() { - do_painting = false; - } + setSurfaceType(QWindow::OpenGLSurface); - void EnablePainting() { - do_painting = true; + // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, + // WA_DontShowOnScreen, WA_DeleteOnClose } - std::pair<unsigned, unsigned> GetSize() const { - return std::make_pair(width(), height()); + ~OpenGLWindow() override { + context->doneCurrent(); } -protected: - bool IsPaintingEnabled() const { - return do_painting; + void Present() override { + if (!isExposed()) { + return; + } + + context->makeCurrent(this); + Core::System::GetInstance().Renderer().TryPresent(100); + context->swapBuffers(this); + auto f = context->versionFunctions<QOpenGLFunctions_4_3_Core>(); + f->glFinish(); + QWindow::requestUpdate(); } private: - GRenderWindow* parent; - bool do_painting = false; -}; - -// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL -// context. -// The corresponding functionality is handled in EmuThread instead -class GGLWidgetInternal final : public GWidgetInternal, public QOpenGLWindow { -public: - GGLWidgetInternal(GRenderWindow* parent, QOpenGLContext* shared_context) - : GWidgetInternal(parent), QOpenGLWindow(shared_context) {} - ~GGLWidgetInternal() override = default; - - void paintEvent(QPaintEvent* ev) override { - if (IsPaintingEnabled()) { - QPainter painter(this); - } - } + QOpenGLContext* context{}; }; #ifdef HAS_VULKAN -class GVKWidgetInternal final : public GWidgetInternal { +class VulkanWindow final : public ChildRenderWindow { public: - GVKWidgetInternal(GRenderWindow* parent, QVulkanInstance* instance) : GWidgetInternal(parent) { + VulkanWindow(QWindow* parent, QWidget* event_handler, QVulkanInstance* instance) + : ChildRenderWindow{parent, event_handler} { setSurfaceType(QSurface::SurfaceType::VulkanSurface); setVulkanInstance(instance); } - ~GVKWidgetInternal() override = default; + + ~VulkanWindow() override = default; + + void Present() override { + // TODO(bunnei): ImplementMe + } + +private: + QWidget* event_handler{}; }; #endif -GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread) - : QWidget(parent), emu_thread(emu_thread) { +GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread) + : QWidget(parent_), emu_thread(emu_thread) { setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") .arg(QString::fromUtf8(Common::g_build_name), QString::fromUtf8(Common::g_scm_branch), QString::fromUtf8(Common::g_scm_desc))); setAttribute(Qt::WA_AcceptTouchEvents); - + auto layout = new QHBoxLayout(this); + layout->setMargin(0); + setLayout(layout); InputCommon::Init(); + + GMainWindow* parent = GetMainWindow(); connect(this, &GRenderWindow::FirstFrameDisplayed, parent, &GMainWindow::OnLoadComplete); } GRenderWindow::~GRenderWindow() { InputCommon::Shutdown(); - - // Avoid an unordered destruction that generates a segfault - delete child; } -void GRenderWindow::moveContext() { - if (!context) { - return; +void GRenderWindow::MakeCurrent() { + if (core_context) { + core_context->MakeCurrent(); } - DoneCurrent(); - - // If the thread started running, move the GL Context to the new thread. Otherwise, move it - // back. - auto thread = (QThread::currentThread() == qApp->thread() && emu_thread != nullptr) - ? emu_thread - : qApp->thread(); - context->moveToThread(thread); } -void GRenderWindow::SwapBuffers() { - if (context) { - context->swapBuffers(child); +void GRenderWindow::DoneCurrent() { + if (core_context) { + core_context->DoneCurrent(); } +} + +void GRenderWindow::PollEvents() { if (!first_frame) { first_frame = true; emit FirstFrameDisplayed(); } } -void GRenderWindow::MakeCurrent() { - if (context) { - context->makeCurrent(child); - } -} - -void GRenderWindow::DoneCurrent() { - if (context) { - context->doneCurrent(); - } -} - -void GRenderWindow::PollEvents() {} - bool GRenderWindow::IsShown() const { return !isMinimized(); } @@ -291,7 +314,7 @@ void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* i #ifdef HAS_VULKAN const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr"); const VkInstance instance_copy = vk_instance->vkInstance(); - const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child); + const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_window); std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); std::memcpy(instance, &instance_copy, sizeof(instance_copy)); @@ -309,21 +332,10 @@ void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* i void GRenderWindow::OnFramebufferSizeChanged() { // Screen changes potentially incur a change in screen DPI, hence we should update the // framebuffer size - const qreal pixelRatio{GetWindowPixelRatio()}; - const auto size{child->GetSize()}; - UpdateCurrentFramebufferLayout(size.first * pixelRatio, size.second * pixelRatio); -} - -void GRenderWindow::ForwardKeyPressEvent(QKeyEvent* event) { - if (child) { - child->keyPressEvent(event); - } -} - -void GRenderWindow::ForwardKeyReleaseEvent(QKeyEvent* event) { - if (child) { - child->keyReleaseEvent(event); - } + const qreal pixel_ratio = windowPixelRatio(); + const u32 width = this->width() * pixel_ratio; + const u32 height = this->height() * pixel_ratio; + UpdateCurrentFramebufferLayout(width, height); } void GRenderWindow::BackupGeometry() { @@ -351,13 +363,12 @@ QByteArray GRenderWindow::saveGeometry() { return geometry; } -qreal GRenderWindow::GetWindowPixelRatio() const { - // windowHandle() might not be accessible until the window is displayed to screen. - return windowHandle() ? windowHandle()->screen()->devicePixelRatio() : 1.0f; +qreal GRenderWindow::windowPixelRatio() const { + return devicePixelRatio(); } std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF pos) const { - const qreal pixel_ratio{GetWindowPixelRatio()}; + const qreal pixel_ratio = windowPixelRatio(); return {static_cast<u32>(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})), static_cast<u32>(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))}; } @@ -367,6 +378,47 @@ void GRenderWindow::closeEvent(QCloseEvent* event) { QWidget::closeEvent(event); } +void GRenderWindow::keyPressEvent(QKeyEvent* event) { + InputCommon::GetKeyboard()->PressKey(event->key()); +} + +void GRenderWindow::keyReleaseEvent(QKeyEvent* event) { + InputCommon::GetKeyboard()->ReleaseKey(event->key()); +} + +void GRenderWindow::mousePressEvent(QMouseEvent* event) { + if (event->source() == Qt::MouseEventSynthesizedBySystem) + return; // touch input is handled in TouchBeginEvent + + auto pos = event->pos(); + if (event->button() == Qt::LeftButton) { + const auto [x, y] = ScaleTouch(pos); + this->TouchPressed(x, y); + } else if (event->button() == Qt::RightButton) { + InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y()); + } +} + +void GRenderWindow::mouseMoveEvent(QMouseEvent* event) { + if (event->source() == Qt::MouseEventSynthesizedBySystem) + return; // touch input is handled in TouchUpdateEvent + + auto pos = event->pos(); + const auto [x, y] = ScaleTouch(pos); + this->TouchMoved(x, y); + InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y()); +} + +void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) { + if (event->source() == Qt::MouseEventSynthesizedBySystem) + return; // touch input is handled in TouchEndEvent + + if (event->button() == Qt::LeftButton) + this->TouchReleased(); + else if (event->button() == Qt::RightButton) + InputCommon::GetMotionEmu()->EndTilt(); +} + void GRenderWindow::TouchBeginEvent(const QTouchEvent* event) { // TouchBegin always has exactly one touch point, so take the .first() const auto [x, y] = ScaleTouch(event->touchPoints().first().pos()); @@ -415,26 +467,20 @@ void GRenderWindow::focusOutEvent(QFocusEvent* event) { InputCommon::GetKeyboard()->ReleaseAllKeys(); } -void GRenderWindow::OnClientAreaResized(u32 width, u32 height) { - NotifyClientAreaSizeChanged(std::make_pair(width, height)); +void GRenderWindow::resizeEvent(QResizeEvent* event) { + QWidget::resizeEvent(event); + OnFramebufferSizeChanged(); } std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const { - return std::make_unique<GGLContext>(context.get()); + if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { + return std::make_unique<GGLContext>(QOpenGLContext::globalShareContext()); + } + return {}; } bool GRenderWindow::InitRenderTarget() { - shared_context.reset(); - context.reset(); - if (child) { - delete child; - } - if (container) { - delete container; - } - if (layout()) { - delete layout(); - } + ReleaseRenderTarget(); first_frame = false; @@ -451,13 +497,6 @@ bool GRenderWindow::InitRenderTarget() { break; } - container = QWidget::createWindowContainer(child, this); - QBoxLayout* layout = new QHBoxLayout(this); - - layout->addWidget(container); - layout->setMargin(0); - setLayout(layout); - // Reset minimum required size to avoid resizing issues on the main window after restarting. setMinimumSize(1, 1); @@ -467,14 +506,9 @@ bool GRenderWindow::InitRenderTarget() { hide(); resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); - child->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); - container->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); - OnFramebufferSizeChanged(); - NotifyClientAreaSizeChanged(child->GetSize()); - BackupGeometry(); if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { @@ -486,6 +520,14 @@ bool GRenderWindow::InitRenderTarget() { return true; } +void GRenderWindow::ReleaseRenderTarget() { + if (child_widget) { + layout()->removeWidget(child_widget); + delete child_widget; + child_widget = nullptr; + } +} + void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { auto& renderer = Core::System::GetInstance().Renderer(); @@ -521,16 +563,19 @@ bool GRenderWindow::InitializeOpenGL() { fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); // TODO: expose a setting for buffer value (ie default/single/double/triple) fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); - shared_context = std::make_unique<QOpenGLContext>(); - shared_context->setFormat(fmt); - shared_context->create(); - context = std::make_unique<QOpenGLContext>(); - context->setShareContext(shared_context.get()); - context->setFormat(fmt); - context->create(); - fmt.setSwapInterval(false); - - child = new GGLWidgetInternal(this, shared_context.get()); + fmt.setSwapInterval(0); + QSurfaceFormat::setDefaultFormat(fmt); + + GMainWindow* parent = GetMainWindow(); + QWindow* parent_win_handle = parent ? parent->windowHandle() : nullptr; + child_window = new OpenGLWindow(parent_win_handle, this, QOpenGLContext::globalShareContext()); + child_window->create(); + child_widget = createWindowContainer(child_window, this); + child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); + layout()->addWidget(child_widget); + + core_context = CreateSharedContext(); + return true; } @@ -559,7 +604,14 @@ bool GRenderWindow::InitializeVulkan() { return false; } - child = new GVKWidgetInternal(this, vk_instance.get()); + GMainWindow* parent = GetMainWindow(); + QWindow* parent_win_handle = parent ? parent->windowHandle() : nullptr; + child_window = new VulkanWindow(parent_win_handle, this, vk_instance.get()); + child_window->create(); + child_widget = createWindowContainer(child_window, this); + child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); + layout()->addWidget(child_widget); + return true; #else QMessageBox::critical(this, tr("Vulkan not available!"), @@ -569,7 +621,7 @@ bool GRenderWindow::InitializeVulkan() { } bool GRenderWindow::LoadOpenGL() { - Core::Frontend::ScopeAcquireWindowContext acquire_context{*this}; + Core::Frontend::ScopeAcquireContext acquire_context{*this}; if (!gladLoadGL()) { QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3!"), tr("Your GPU may not support OpenGL 4.3, or you do not have the " @@ -621,12 +673,10 @@ QStringList GRenderWindow::GetUnsupportedGLExtensions() const { void GRenderWindow::OnEmulationStarting(EmuThread* emu_thread) { this->emu_thread = emu_thread; - child->DisablePainting(); } void GRenderWindow::OnEmulationStopping() { emu_thread = nullptr; - child->EnablePainting(); } void GRenderWindow::showEvent(QShowEvent* event) { diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 71a2fa321..79b030304 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -11,11 +11,13 @@ #include <QImage> #include <QThread> #include <QWidget> +#include <QWindow> #include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" +class GRenderWindow; class QKeyEvent; class QScreen; class QTouchEvent; @@ -26,14 +28,6 @@ class QOpenGLContext; class QVulkanInstance; #endif -class GWidgetInternal; -class GGLWidgetInternal; -class GVKWidgetInternal; -class GMainWindow; -class GRenderWindow; -class QSurface; -class QOpenGLContext; - namespace VideoCore { enum class LoadCallbackStage; } @@ -42,7 +36,7 @@ class EmuThread final : public QThread { Q_OBJECT public: - explicit EmuThread(GRenderWindow* render_window); + explicit EmuThread(GRenderWindow& window); ~EmuThread() override; /** @@ -96,7 +90,11 @@ private: std::mutex running_mutex; std::condition_variable running_cv; - GRenderWindow* render_window; + /// Only used in asynchronous GPU mode + std::unique_ptr<Core::Frontend::GraphicsContext> shared_context; + + /// This is shared_context in asynchronous GPU mode, core_context in synchronous GPU mode + Core::Frontend::GraphicsContext& context; signals: /** @@ -126,11 +124,10 @@ class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow { Q_OBJECT public: - GRenderWindow(GMainWindow* parent, EmuThread* emu_thread); + GRenderWindow(QWidget* parent, EmuThread* emu_thread); ~GRenderWindow() override; - // EmuWindow implementation - void SwapBuffers() override; + // EmuWindow implementation. void MakeCurrent() override; void DoneCurrent() override; void PollEvents() override; @@ -139,30 +136,36 @@ public: void* surface) const override; std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; - void ForwardKeyPressEvent(QKeyEvent* event); - void ForwardKeyReleaseEvent(QKeyEvent* event); - void BackupGeometry(); void RestoreGeometry(); void restoreGeometry(const QByteArray& geometry); // overridden QByteArray saveGeometry(); // overridden - qreal GetWindowPixelRatio() const; - std::pair<u32, u32> ScaleTouch(QPointF pos) const; + qreal windowPixelRatio() const; void closeEvent(QCloseEvent* event) override; + + void resizeEvent(QResizeEvent* event) override; + + void keyPressEvent(QKeyEvent* event) override; + void keyReleaseEvent(QKeyEvent* event) override; + + void mousePressEvent(QMouseEvent* event) override; + void mouseMoveEvent(QMouseEvent* event) override; + void mouseReleaseEvent(QMouseEvent* event) override; + bool event(QEvent* event) override; - void focusOutEvent(QFocusEvent* event) override; - void OnClientAreaResized(u32 width, u32 height); + void focusOutEvent(QFocusEvent* event) override; bool InitRenderTarget(); + /// Destroy the previous run's child_widget which should also destroy the child_window + void ReleaseRenderTarget(); + void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); public slots: - void moveContext(); // overridden - void OnEmulationStarting(EmuThread* emu_thread); void OnEmulationStopping(); void OnFramebufferSizeChanged(); @@ -173,6 +176,7 @@ signals: void FirstFrameDisplayed(); private: + std::pair<u32, u32> ScaleTouch(QPointF pos) const; void TouchBeginEvent(const QTouchEvent* event); void TouchUpdateEvent(const QTouchEvent* event); void TouchEndEvent(); @@ -184,15 +188,9 @@ private: bool LoadOpenGL(); QStringList GetUnsupportedGLExtensions() const; - QWidget* container = nullptr; - GWidgetInternal* child = nullptr; - EmuThread* emu_thread; - // Context that backs the GGLWidgetInternal (and will be used by core to render) - std::unique_ptr<QOpenGLContext> context; - // Context that will be shared between all newly created contexts. This should never be made - // current - std::unique_ptr<QOpenGLContext> shared_context; + + std::unique_ptr<GraphicsContext> core_context; #ifdef HAS_VULKAN std::unique_ptr<QVulkanInstance> vk_instance; @@ -202,6 +200,15 @@ private: QImage screenshot_image; QByteArray geometry; + + /// Native window handle that backs this presentation widget + QWindow* child_window = nullptr; + + /// In order to embed the window into GRenderWindow, you need to use createWindowContainer to + /// put the child_window into a widget then add it to the layout. This child_widget can be + /// parented to GRenderWindow and use Qt's lifetime system + QWidget* child_widget = nullptr; + bool first_frame = false; protected: diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 6209fff75..d0f574147 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -640,6 +640,7 @@ void Config::ReadRendererValues() { ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); Settings::values.use_asynchronous_gpu_emulation = ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); + Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); Settings::values.force_30fps_mode = ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool(); @@ -1074,6 +1075,7 @@ void Config::SaveRendererValues() { Settings::values.use_accurate_gpu_emulation, false); WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), Settings::values.use_asynchronous_gpu_emulation, false); + WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); // Cast to double because Qt's written float values are not human-readable diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index ea899c080..fe64c7d81 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -103,6 +103,8 @@ void ConfigureGraphics::SetConfiguration() { ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); + ui->use_vsync->setEnabled(runtime_lock); + ui->use_vsync->setChecked(Settings::values.use_vsync); ui->force_30fps_mode->setEnabled(runtime_lock); ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, @@ -120,6 +122,7 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); Settings::values.use_asynchronous_gpu_emulation = ui->use_asynchronous_gpu_emulation->isChecked(); + Settings::values.use_vsync = ui->use_vsync->isChecked(); Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); Settings::values.bg_red = static_cast<float>(bg_color.redF()); Settings::values.bg_green = static_cast<float>(bg_color.greenF()); diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index db60426ab..9acc7dd93 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -85,6 +85,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="use_vsync"> + <property name="toolTip"> + <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> + </property> + <property name="text"> + <string>Use VSync (OpenGL only)</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="use_accurate_gpu_emulation"> <property name="text"> <string>Use accurate GPU emulation (slow)</string> diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 3f1a94627..c1ea25fb8 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -116,7 +116,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons constexpr std::size_t BaseRegister = 29; auto& memory = Core::System::GetInstance().Memory(); - u64 base_pointer = thread.GetContext().cpu_registers[BaseRegister]; + u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister]; while (base_pointer != 0) { const u64 lr = memory.Read64(base_pointer + sizeof(u64)); @@ -240,7 +240,7 @@ QString WaitTreeThread::GetText() const { break; } - const auto& context = thread.GetContext(); + const auto& context = thread.GetContext64(); const QString pc_info = tr(" PC = 0x%1 LR = 0x%2") .arg(context.pc, 8, 16, QLatin1Char{'0'}) .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'}); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 54ca2dc1d..47615adfe 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -20,7 +20,6 @@ #include "core/file_sys/vfs.h" #include "core/file_sys/vfs_real.h" #include "core/frontend/applets/general_frontend.h" -#include "core/frontend/scope_acquire_window_context.h" #include "core/hle/service/acc/profile_manager.h" #include "core/hle/service/am/applet_ae.h" #include "core/hle/service/am/applet_oe.h" @@ -985,11 +984,8 @@ void GMainWindow::BootGame(const QString& filename) { return; // Create and start the emulation thread - emu_thread = std::make_unique<EmuThread>(render_window); + emu_thread = std::make_unique<EmuThread>(*render_window); emit EmulationStarting(emu_thread.get()); - if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { - render_window->moveContext(); - } emu_thread->start(); connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); @@ -1087,6 +1083,9 @@ void GMainWindow::ShutdownGame() { emulation_running = false; game_path.clear(); + + // When closing the game, destroy the GLWindow to clear the context after the game is closed + render_window->ReleaseRenderTarget(); } void GMainWindow::StoreRecentFile(const QString& filename) { @@ -2215,48 +2214,47 @@ void GMainWindow::closeEvent(QCloseEvent* event) { QWidget::closeEvent(event); } -void GMainWindow::keyPressEvent(QKeyEvent* event) { - if (render_window) { - render_window->ForwardKeyPressEvent(event); - } +static bool IsSingleFileDropEvent(const QMimeData* mime) { + return mime->hasUrls() && mime->urls().length() == 1; } -void GMainWindow::keyReleaseEvent(QKeyEvent* event) { - if (render_window) { - render_window->ForwardKeyReleaseEvent(event); +void GMainWindow::AcceptDropEvent(QDropEvent* event) { + if (IsSingleFileDropEvent(event->mimeData())) { + event->setDropAction(Qt::DropAction::LinkAction); + event->accept(); } } -static bool IsSingleFileDropEvent(QDropEvent* event) { - const QMimeData* mimeData = event->mimeData(); - return mimeData->hasUrls() && mimeData->urls().length() == 1; -} - -void GMainWindow::dropEvent(QDropEvent* event) { - if (!IsSingleFileDropEvent(event)) { - return; +bool GMainWindow::DropAction(QDropEvent* event) { + if (!IsSingleFileDropEvent(event->mimeData())) { + return false; } const QMimeData* mime_data = event->mimeData(); - const QString filename = mime_data->urls().at(0).toLocalFile(); + const QString& filename = mime_data->urls().at(0).toLocalFile(); if (emulation_running && QFileInfo(filename).suffix() == QStringLiteral("bin")) { + // Amiibo LoadAmiibo(filename); } else { + // Game if (ConfirmChangeGame()) { BootGame(filename); } } + return true; +} + +void GMainWindow::dropEvent(QDropEvent* event) { + DropAction(event); } void GMainWindow::dragEnterEvent(QDragEnterEvent* event) { - if (IsSingleFileDropEvent(event)) { - event->acceptProposedAction(); - } + AcceptDropEvent(event); } void GMainWindow::dragMoveEvent(QDragMoveEvent* event) { - event->acceptProposedAction(); + AcceptDropEvent(event); } bool GMainWindow::ConfirmChangeGame() { @@ -2377,6 +2375,7 @@ int main(int argc, char* argv[]) { // Enables the core to make the qt created contexts current on std::threads QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity); + QCoreApplication::setAttribute(Qt::AA_ShareOpenGLContexts); QApplication app(argc, argv); // Qt changes the locale and causes issues in float conversion using std::to_string() when diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 8eba2172c..a67125567 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -78,6 +78,9 @@ public: std::unique_ptr<DiscordRPC::DiscordInterface> discord_rpc; + bool DropAction(QDropEvent* event); + void AcceptDropEvent(QDropEvent* event); + signals: /** @@ -264,8 +267,4 @@ protected: void dropEvent(QDropEvent* event) override; void dragEnterEvent(QDragEnterEvent* event) override; void dragMoveEvent(QDragMoveEvent* event) override; - - // Overrides used to forward signals to the render window when the focus moves out. - void keyPressEvent(QKeyEvent* event) override; - void keyReleaseEvent(QKeyEvent* event) override; }; diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 96f1ce3af..b77c12baf 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -390,6 +390,8 @@ void Config::ReadValues() { sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); Settings::values.use_asynchronous_gpu_emulation = sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); + Settings::values.use_vsync = + static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); Settings::values.bg_green = diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 8a2b658cd..085ffbc81 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -84,7 +84,7 @@ touch_device= # from any cemuhook compatible motion program. # IPv4 address of the udp input server (Default "127.0.0.1") -udp_input_address= +udp_input_address=127.0.0.1 # Port of the udp input server. (Default 26760) udp_input_port= @@ -150,6 +150,11 @@ use_accurate_gpu_emulation = # 0 : Off (slow), 1 (default): On (fast) use_asynchronous_gpu_emulation = +# Forces VSync on the display thread. Usually doesn't impact performance, but on some drivers it can +# so only turn this off if you notice a speed difference. +# 0: Off, 1 (default): On +use_vsync = + # The clear color for the renderer. What shows up on the sides of the bottom screen. # Must be in range of 0.0-1.0. Defaults to 1.0 for all. bg_red = diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index e96139885..19584360c 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -13,7 +13,7 @@ #include "input_common/sdl/sdl.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" -EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { +EmuWindow_SDL2::EmuWindow_SDL2(Core::System& system, bool fullscreen) : system{system} { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) { LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting..."); exit(1); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h index b38f56661..fffac4252 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h @@ -10,9 +10,13 @@ struct SDL_Window; +namespace Core { +class System; +} + class EmuWindow_SDL2 : public Core::Frontend::EmuWindow { public: - explicit EmuWindow_SDL2(bool fullscreen); + explicit EmuWindow_SDL2(Core::System& system, bool fullscreen); ~EmuWindow_SDL2(); /// Polls window events @@ -24,6 +28,9 @@ public: /// Returns if window is shown (not minimized) bool IsShown() const override; + /// Presents the next frame + virtual void Present() = 0; + protected: /// Called by PollEvents when a key is pressed or released. void OnKeyEvent(int key, u8 state); @@ -55,6 +62,9 @@ protected: /// Called when a configuration change affects the minimal size of the window void OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) override; + /// Instance of the system, used to access renderer for the presentation thread + Core::System& system; + /// Is the window still open? bool is_open = true; @@ -62,7 +72,7 @@ protected: bool is_shown = true; /// Internal SDL2 render window - SDL_Window* render_window; + SDL_Window* render_window{}; /// Keeps track of how often to update the title bar during gameplay u32 last_time = 0; diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 7ffa0ac09..c0d373477 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -13,24 +13,25 @@ #include "common/logging/log.h" #include "common/scm_rev.h" #include "common/string_util.h" +#include "core/core.h" #include "core/settings.h" #include "input_common/keyboard.h" #include "input_common/main.h" #include "input_common/motion_emu.h" +#include "video_core/renderer_base.h" #include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h" class SDLGLContext : public Core::Frontend::GraphicsContext { public: explicit SDLGLContext() { // create a hidden window to make the shared context against - window = SDL_CreateWindow("", SDL_WINDOWPOS_UNDEFINED, // x position - SDL_WINDOWPOS_UNDEFINED, // y position - Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, - SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN); + window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0, + SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL); context = SDL_GL_CreateContext(window); } ~SDLGLContext() { + DoneCurrent(); SDL_GL_DeleteContext(context); SDL_DestroyWindow(window); } @@ -43,8 +44,6 @@ public: SDL_GL_MakeCurrent(window, nullptr); } - void SwapBuffers() override {} - private: SDL_Window* window; SDL_GLContext context; @@ -80,7 +79,8 @@ bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() { return unsupported_ext.empty(); } -EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscreen) { +EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen) + : EmuWindow_SDL2{system, fullscreen} { SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_COMPATIBILITY); @@ -90,6 +90,7 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscree SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); + SDL_GL_SetSwapInterval(0); std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc); @@ -105,13 +106,22 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscree exit(1); } + dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0, + SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL); + if (fullscreen) { Fullscreen(); } - gl_context = SDL_GL_CreateContext(render_window); - if (gl_context == nullptr) { - LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context! {}", SDL_GetError()); + window_context = SDL_GL_CreateContext(render_window); + core_context = CreateSharedContext(); + + if (window_context == nullptr) { + LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context: {}", SDL_GetError()); + exit(1); + } + if (core_context == nullptr) { + LOG_CRITICAL(Frontend, "Failed to create shared SDL2 GL context: {}", SDL_GetError()); exit(1); } @@ -128,28 +138,22 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscree OnResize(); OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); SDL_PumpEvents(); - SDL_GL_SetSwapInterval(false); LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc); Settings::LogSettings(); - - DoneCurrent(); } EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() { - SDL_GL_DeleteContext(gl_context); -} - -void EmuWindow_SDL2_GL::SwapBuffers() { - SDL_GL_SwapWindow(render_window); + core_context.reset(); + SDL_GL_DeleteContext(window_context); } void EmuWindow_SDL2_GL::MakeCurrent() { - SDL_GL_MakeCurrent(render_window, gl_context); + core_context->MakeCurrent(); } void EmuWindow_SDL2_GL::DoneCurrent() { - SDL_GL_MakeCurrent(render_window, nullptr); + core_context->DoneCurrent(); } void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, @@ -161,3 +165,13 @@ void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, voi std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { return std::make_unique<SDLGLContext>(); } + +void EmuWindow_SDL2_GL::Present() { + SDL_GL_MakeCurrent(render_window, window_context); + SDL_GL_SetSwapInterval(Settings::values.use_vsync ? 1 : 0); + while (IsOpen()) { + system.Renderer().TryPresent(100); + SDL_GL_SwapWindow(render_window); + } + SDL_GL_MakeCurrent(render_window, nullptr); +} diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h index c753085a8..b80669ff0 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h @@ -10,17 +10,12 @@ class EmuWindow_SDL2_GL final : public EmuWindow_SDL2 { public: - explicit EmuWindow_SDL2_GL(bool fullscreen); + explicit EmuWindow_SDL2_GL(Core::System& system, bool fullscreen); ~EmuWindow_SDL2_GL(); - /// Swap buffers to display the next frame - void SwapBuffers() override; - - /// Makes the graphics context current for the caller thread void MakeCurrent() override; - - /// Releases the GL context from the caller thread void DoneCurrent() override; + void Present() override; /// Ignored in OpenGL void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, @@ -29,10 +24,17 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; private: + /// Fake hidden window for the core context + SDL_Window* dummy_window{}; + /// Whether the GPU and driver supports the OpenGL extension required bool SupportsRequiredGLExtensions(); using SDL_GLContext = void*; + /// The OpenGL context associated with the window - SDL_GLContext gl_context; + SDL_GLContext window_context; + + /// The OpenGL context associated with the core + std::unique_ptr<Core::Frontend::GraphicsContext> core_context; }; diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp index a203f0da9..abcc58165 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp @@ -15,7 +15,8 @@ #include "core/settings.h" #include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" -EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(bool fullscreen) : EmuWindow_SDL2(fullscreen) { +EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) + : EmuWindow_SDL2{system, fullscreen} { if (SDL_Vulkan_LoadLibrary(nullptr) != 0) { LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError()); exit(EXIT_FAILURE); @@ -110,8 +111,6 @@ EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() { vkDestroyInstance(vk_instance, nullptr); } -void EmuWindow_SDL2_VK::SwapBuffers() {} - void EmuWindow_SDL2_VK::MakeCurrent() { // Unused on Vulkan } @@ -160,3 +159,7 @@ bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanc return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation"); }) != layers.end(); } + +void EmuWindow_SDL2_VK::Present() { + // TODO (bunnei): ImplementMe +} diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h index 2a7c06a24..1eb8c0868 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h @@ -10,19 +10,12 @@ class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { public: - explicit EmuWindow_SDL2_VK(bool fullscreen); + explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen); ~EmuWindow_SDL2_VK(); - /// Swap buffers to display the next frame - void SwapBuffers() override; - - /// Makes the graphics context current for the caller thread void MakeCurrent() override; - - /// Releases the GL context from the caller thread void DoneCurrent() override; - - /// Retrieves Vulkan specific handlers from the window + void Present() override; void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, void* surface) const override; diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 325795321..babf4c3a4 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -177,14 +177,16 @@ int main(int argc, char** argv) { Settings::values.use_gdbstub = use_gdbstub; Settings::Apply(); + Core::System& system{Core::System::GetInstance()}; + std::unique_ptr<EmuWindow_SDL2> emu_window; switch (Settings::values.renderer_backend) { case Settings::RendererBackend::OpenGL: - emu_window = std::make_unique<EmuWindow_SDL2_GL>(fullscreen); + emu_window = std::make_unique<EmuWindow_SDL2_GL>(system, fullscreen); break; case Settings::RendererBackend::Vulkan: #ifdef HAS_VULKAN - emu_window = std::make_unique<EmuWindow_SDL2_VK>(fullscreen); + emu_window = std::make_unique<EmuWindow_SDL2_VK>(system, fullscreen); break; #else LOG_CRITICAL(Frontend, "Vulkan backend has not been compiled!"); @@ -192,12 +194,6 @@ int main(int argc, char** argv) { #endif } - if (!Settings::values.use_multi_core) { - // Single core mode must acquire OpenGL context for entire emulation session - emu_window->MakeCurrent(); - } - - Core::System& system{Core::System::GetInstance()}; system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>()); system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>()); system.GetFileSystemController().CreateFactories(*system.GetFilesystem()); @@ -234,12 +230,23 @@ int main(int argc, char** argv) { system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); - emu_window->MakeCurrent(); system.Renderer().Rasterizer().LoadDiskResources(); + // Acquire render context for duration of the thread if this is the rendering thread + if (!Settings::values.use_asynchronous_gpu_emulation) { + emu_window->MakeCurrent(); + } + SCOPE_EXIT({ + if (!Settings::values.use_asynchronous_gpu_emulation) { + emu_window->DoneCurrent(); + } + }); + + std::thread render_thread([&emu_window] { emu_window->Present(); }); while (emu_window->IsOpen()) { system.RunLoop(); } + render_thread.join(); system.Shutdown(); diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp index f2cc4a797..a1bdb1a12 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp @@ -112,10 +112,6 @@ EmuWindow_SDL2_Hide::~EmuWindow_SDL2_Hide() { SDL_Quit(); } -void EmuWindow_SDL2_Hide::SwapBuffers() { - SDL_GL_SwapWindow(render_window); -} - void EmuWindow_SDL2_Hide::PollEvents() {} void EmuWindow_SDL2_Hide::MakeCurrent() { diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h index c7fccc002..b13e15309 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h @@ -13,9 +13,6 @@ public: explicit EmuWindow_SDL2_Hide(); ~EmuWindow_SDL2_Hide(); - /// Swap buffers to display the next frame - void SwapBuffers() override; - /// Polls window events void PollEvents() override; |