diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/common/x64/xbyak_abi.h | 95 | ||||
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_32.cpp | 21 | ||||
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_32.h | 5 | ||||
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_cp15.cpp | 81 | ||||
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_cp15.h | 126 | ||||
-rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 20 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 82 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 32 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_sampler_cache.cpp | 6 |
10 files changed, 191 insertions, 289 deletions
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 794da8a52..a5f5d4fc1 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -11,7 +11,7 @@ namespace Common::X64 { -inline int RegToIndex(const Xbyak::Reg& reg) { +inline std::size_t RegToIndex(const Xbyak::Reg& reg) { using Kind = Xbyak::Reg::Kind; ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, "RegSet only support GPRs and XMM registers."); @@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) { return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); } -inline Xbyak::Reg64 IndexToReg64(int reg_index) { +inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { ASSERT(reg_index < 16); - return Xbyak::Reg64(reg_index); + return Xbyak::Reg64(static_cast<int>(reg_index)); } -inline Xbyak::Xmm IndexToXmm(int reg_index) { +inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { ASSERT(reg_index >= 16 && reg_index < 32); - return Xbyak::Xmm(reg_index - 16); + return Xbyak::Xmm(static_cast<int>(reg_index - 16)); } -inline Xbyak::Reg IndexToReg(int reg_index) { +inline Xbyak::Reg IndexToReg(std::size_t reg_index) { if (reg_index < 16) { return IndexToReg64(reg_index); } else { @@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0; #endif -inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, - size_t needed_frame_size, s32* out_subtraction, - s32* out_xmm_offset) { +struct ABIFrameInfo { + s32 subtraction; + s32 xmm_offset; +}; + +inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, + size_t needed_frame_size) { const auto count = (regs & ABI_ALL_GPRS).count(); rsp_alignment -= count * 8; size_t subtraction = 0; @@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, rsp_alignment -= subtraction; subtraction += rsp_alignment & 0xF; - *out_subtraction = (s32)subtraction; - *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); + return ABIFrameInfo{static_cast<s32>(subtraction), + static_cast<s32>(subtraction - xmm_base_subtraction)}; } inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); + for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_GPRS[i]) { - code.push(IndexToReg64(static_cast<int>(i))); + code.push(IndexToReg64(i)); } } - if (subtraction != 0) { - code.sub(code.rsp, subtraction); - } - for (int i = 0; i < regs.count(); i++) { - if (regs.test(i) & ABI_ALL_GPRS.test(i)) { - code.push(IndexToReg64(i)); - } + if (frame_info.subtraction != 0) { + code.sub(code.rsp, frame_info.subtraction); } for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); - xmm_offset += 0x10; + code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); + frame_info.xmm_offset += 0x10; } } @@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); - xmm_offset += 0x10; + code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); + frame_info.xmm_offset += 0x10; } } - if (subtraction != 0) { - code.add(code.rsp, subtraction); + if (frame_info.subtraction != 0) { + code.add(code.rsp, frame_info.subtraction); } // GPRs need to be popped in reverse order - for (int i = 15; i >= 0; i--) { - if (regs[i]) { - code.pop(IndexToReg64(i)); - } - } -} - -inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, - size_t rsp_alignment, - size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - - for (std::size_t i = 0; i < regs.size(); ++i) { + for (std::size_t j = 0; j < regs.size(); ++j) { + const std::size_t i = regs.size() - j - 1; if (regs[i] && ABI_ALL_GPRS[i]) { - code.push(IndexToReg64(static_cast<int>(i))); - } - } - - if (subtraction != 0) { - code.sub(code.rsp, subtraction); - } - - return ABI_SHADOW_SPACE; -} - -inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, - size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - - if (subtraction != 0) { - code.add(code.rsp, subtraction); - } - - // GPRs need to be popped in reverse order - for (int i = 15; i >= 0; i--) { - if (regs[i]) { code.pop(IndexToReg64(i)); } } diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 9bc86e3b9..19d798dc7 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -50,7 +50,8 @@ public: } void InterpreterFallback(u32 pc, std::size_t num_instructions) override { - UNIMPLEMENTED(); + UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc, + MemoryReadCode(pc)); } void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { @@ -89,8 +90,6 @@ public: ARM_Dynarmic_32& parent; std::size_t num_interpreted_instructions{}; - u64 tpidrro_el0{}; - u64 tpidr_el0{}; }; std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, @@ -99,7 +98,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& config.callbacks = cb.get(); // TODO(bunnei): Implement page table for 32-bit // config.page_table = &page_table.pointers; - config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]); + config.coprocessors[15] = cp15; config.define_unpredictable_behaviour = true; return std::make_unique<Dynarmic::A32::Jit>(config); } @@ -112,13 +111,13 @@ void ARM_Dynarmic_32::Run() { } void ARM_Dynarmic_32::Step() { - cb->InterpreterFallback(jit->Regs()[15], 1); + jit->Step(); } ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index) - : ARM_Interface{system}, - cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index}, + : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)), + cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index}, exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; @@ -154,19 +153,19 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) { } u64 ARM_Dynarmic_32::GetTlsAddress() const { - return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; + return cp15->uro; } void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { - CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address); + cp15->uro = static_cast<u32>(address); } u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { - return cb->tpidr_el0; + return cp15->uprw; } void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { - cb->tpidr_el0 = value; + cp15->uprw = static_cast<u32>(value); } void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index 8ba9cea8f..e5b92d7bb 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -22,6 +22,7 @@ class Memory; namespace Core { class DynarmicCallbacks32; +class DynarmicCP15; class DynarmicExclusiveMonitor; class System; @@ -66,12 +67,14 @@ private: std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; friend class DynarmicCallbacks32; + friend class DynarmicCP15; + std::unique_ptr<DynarmicCallbacks32> cb; JitCacheType jit_cache; std::shared_ptr<Dynarmic::A32::Jit> jit; + std::shared_ptr<DynarmicCP15> cp15; std::size_t core_index; DynarmicExclusiveMonitor& exclusive_monitor; - std::array<u32, 84> CP15_regs{}; }; } // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp index 3fdcdebde..d43e4dd70 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp @@ -2,79 +2,132 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <fmt/format.h> +#include "common/logging/log.h" +#include "core/arm/dynarmic/arm_dynarmic_32.h" #include "core/arm/dynarmic/arm_dynarmic_cp15.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/core_timing_util.h" using Callback = Dynarmic::A32::Coprocessor::Callback; using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; +template <> +struct fmt::formatter<Dynarmic::A32::CoprocReg> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) { + return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg)); + } +}; + +namespace Core { + +static u32 dummy_value; + std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, CoprocReg CRn, CoprocReg CRm, unsigned opc2) { + LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn, + CRm, opc2); return {}; } CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, unsigned opc2) { - // TODO(merry): Privileged CP15 registers - if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { + // CP15_FLUSH_PREFETCH_BUFFER // This is a dummy write, we ignore the value written here. - return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)]; + return &dummy_value; } if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { switch (opc2) { case 4: + // CP15_DATA_SYNC_BARRIER // This is a dummy write, we ignore the value written here. - return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)]; + return &dummy_value; case 5: + // CP15_DATA_MEMORY_BARRIER // This is a dummy write, we ignore the value written here. - return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)]; - default: - return {}; + return &dummy_value; } } if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { - return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; + // CP15_THREAD_UPRW + return &uprw; } + LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm, + opc2); return {}; } CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { + LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm); return {}; } CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm, unsigned opc2) { - // TODO(merry): Privileged CP15 registers - if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { switch (opc2) { case 2: - return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; + // CP15_THREAD_UPRW + return &uprw; case 3: - return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; - default: - return {}; + // CP15_THREAD_URO + return &uro; } } + LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm, + opc2); return {}; } CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { + if (!two && opc == 0 && CRm == CoprocReg::C14) { + // CNTPCT + const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>( + [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 { + ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg; + return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); + }); + return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent}; + } + + LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm); return {}; } std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, std::optional<u8> option) { + if (option) { + LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "", + long_transfer ? "l" : "", CRd, *option); + } else { + LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "", + long_transfer ? "l" : "", CRd); + } return {}; } std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, std::optional<u8> option) { + if (option) { + LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "", + long_transfer ? "l" : "", CRd, *option); + } else { + LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "", + long_transfer ? "l" : "", CRd); + } return {}; } + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h index 07bcde5f9..7356d252e 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h @@ -10,128 +10,15 @@ #include <dynarmic/A32/coprocessor.h> #include "common/common_types.h" -enum class CP15Register { - // c0 - Information registers - CP15_MAIN_ID, - CP15_CACHE_TYPE, - CP15_TCM_STATUS, - CP15_TLB_TYPE, - CP15_CPU_ID, - CP15_PROCESSOR_FEATURE_0, - CP15_PROCESSOR_FEATURE_1, - CP15_DEBUG_FEATURE_0, - CP15_AUXILIARY_FEATURE_0, - CP15_MEMORY_MODEL_FEATURE_0, - CP15_MEMORY_MODEL_FEATURE_1, - CP15_MEMORY_MODEL_FEATURE_2, - CP15_MEMORY_MODEL_FEATURE_3, - CP15_ISA_FEATURE_0, - CP15_ISA_FEATURE_1, - CP15_ISA_FEATURE_2, - CP15_ISA_FEATURE_3, - CP15_ISA_FEATURE_4, +namespace Core { - // c1 - Control registers - CP15_CONTROL, - CP15_AUXILIARY_CONTROL, - CP15_COPROCESSOR_ACCESS_CONTROL, - - // c2 - Translation table registers - CP15_TRANSLATION_BASE_TABLE_0, - CP15_TRANSLATION_BASE_TABLE_1, - CP15_TRANSLATION_BASE_CONTROL, - CP15_DOMAIN_ACCESS_CONTROL, - CP15_RESERVED, - - // c5 - Fault status registers - CP15_FAULT_STATUS, - CP15_INSTR_FAULT_STATUS, - CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS, - CP15_INST_FSR, - - // c6 - Fault Address registers - CP15_FAULT_ADDRESS, - CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS, - CP15_WFAR, - CP15_IFAR, - - // c7 - Cache operation registers - CP15_WAIT_FOR_INTERRUPT, - CP15_PHYS_ADDRESS, - CP15_INVALIDATE_INSTR_CACHE, - CP15_INVALIDATE_INSTR_CACHE_USING_MVA, - CP15_INVALIDATE_INSTR_CACHE_USING_INDEX, - CP15_FLUSH_PREFETCH_BUFFER, - CP15_FLUSH_BRANCH_TARGET_CACHE, - CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY, - CP15_INVALIDATE_DATA_CACHE, - CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA, - CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, - CP15_INVALIDATE_DATA_AND_INSTR_CACHE, - CP15_CLEAN_DATA_CACHE, - CP15_CLEAN_DATA_CACHE_LINE_USING_MVA, - CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX, - CP15_DATA_SYNC_BARRIER, - CP15_DATA_MEMORY_BARRIER, - CP15_CLEAN_AND_INVALIDATE_DATA_CACHE, - CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA, - CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, - - // c8 - TLB operations - CP15_INVALIDATE_ITLB, - CP15_INVALIDATE_ITLB_SINGLE_ENTRY, - CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH, - CP15_INVALIDATE_ITLB_ENTRY_ON_MVA, - CP15_INVALIDATE_DTLB, - CP15_INVALIDATE_DTLB_SINGLE_ENTRY, - CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH, - CP15_INVALIDATE_DTLB_ENTRY_ON_MVA, - CP15_INVALIDATE_UTLB, - CP15_INVALIDATE_UTLB_SINGLE_ENTRY, - CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH, - CP15_INVALIDATE_UTLB_ENTRY_ON_MVA, - - // c9 - Data cache lockdown register - CP15_DATA_CACHE_LOCKDOWN, - - // c10 - TLB/Memory map registers - CP15_TLB_LOCKDOWN, - CP15_PRIMARY_REGION_REMAP, - CP15_NORMAL_REGION_REMAP, - - // c13 - Thread related registers - CP15_PID, - CP15_CONTEXT_ID, - CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write - CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W) - CP15_THREAD_PRW, // Thread ID register - Privileged R/W only. - - // c15 - Performance and TLB lockdown registers - CP15_PERFORMANCE_MONITOR_CONTROL, - CP15_CYCLE_COUNTER, - CP15_COUNT_0, - CP15_COUNT_1, - CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY, - CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY, - CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS, - CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS, - CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE, - CP15_TLB_DEBUG_CONTROL, - - // Skyeye defined - CP15_TLB_FAULT_ADDR, - CP15_TLB_FAULT_STATUS, - - // Not an actual register. - // All registers should be defined above this. - CP15_REGISTER_COUNT, -}; +class ARM_Dynarmic_32; class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { public: using CoprocReg = Dynarmic::A32::CoprocReg; - explicit DynarmicCP15(u32* cp15) : CP15(cp15){}; + explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {} std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, CoprocReg CRn, CoprocReg CRm, @@ -147,6 +34,9 @@ public: std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, std::optional<u8> option) override; -private: - u32* CP15{}; + ARM_Dynarmic_32& parent; + u32 uprw; + u32 uro; }; + +} // namespace Core diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 30a7e1fe9..bee34a7c0 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -298,22 +298,22 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { sub(result, opcode.immediate * -1); } } - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, RESULT); Common::X64::CallFarFunction(*this, &Read); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(RESULT, Common::X64::ABI_RETURN.cvt32()); Compile_ProcessResult(opcode.result_operation, opcode.dst); } void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); mov(Common::X64::ABI_PARAM3, value); Common::X64::CallFarFunction(*this, &Send); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); Xbyak::Label dont_process{}; // Get increment @@ -417,7 +417,7 @@ void MacroJITx64Impl::Compile() { bool keep_executing = true; labels.fill(Xbyak::Label()); - Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); // JIT state mov(STATE, Common::X64::ABI_PARAM1); mov(PARAMETERS, Common::X64::ABI_PARAM2); @@ -455,7 +455,7 @@ void MacroJITx64Impl::Compile() { L(end_of_code); - Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); ret(); ready(); program = getCode<ProgramType>(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index e245e27ec..b31d604e4 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -123,16 +123,24 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); u32 base_images = 0; - // Reserve more image bindings on fragment and vertex stages. + // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. + // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the + // fragment stage, and at least 1 for the rest of the stages. + // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. + + // Reserve at least 4 image bindings on the fragment stage. bindings[4].image = - Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]); - bindings[0].image = - Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]); + Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); + + // This is guaranteed to be at least 1. + const u32 total_extracted_images = num_images / (NumStages - 1); // Reserve the other image bindings. - const u32 total_extracted_images = num_images / (NumStages - 2); - for (std::size_t i = 2; i < NumStages; ++i) { + for (std::size_t i = 0; i < NumStages; ++i) { const std::size_t stage = stage_swizzle[i]; + if (stage == 4) { + continue; + } bindings[stage].image = Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); } diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 994ae98eb..35e329240 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -46,10 +46,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; } + break; case Maxwell::VertexAttribute::Type::SignedInt: case Maxwell::VertexAttribute::Type::SignedNorm: switch (attrib.size) { @@ -70,10 +68,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; } + break; case Maxwell::VertexAttribute::Type::Float: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_16: @@ -86,10 +82,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_32_32_32: case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return GL_FLOAT; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; } + break; case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: @@ -102,10 +96,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_16_16_16: case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_UNSIGNED_SHORT; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; } + break; case Maxwell::VertexAttribute::Type::SignedScaled: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: @@ -118,14 +110,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_16_16_16: case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_SHORT; - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - return {}; } - default: - LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); - return {}; + break; } + UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(), + attrib.SizeString()); + return {}; } inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { @@ -137,8 +127,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { case Maxwell::IndexFormat::UnsignedInt: return GL_UNSIGNED_INT; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format)); - UNREACHABLE(); + UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format)); return {}; } @@ -180,33 +169,32 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { } inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, - Tegra::Texture::TextureMipmapFilter mip_filter_mode) { + Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) { switch (filter_mode) { - case Tegra::Texture::TextureFilter::Linear: { - switch (mip_filter_mode) { + case Tegra::Texture::TextureFilter::Nearest: + switch (mipmap_filter_mode) { case Tegra::Texture::TextureMipmapFilter::None: - return GL_LINEAR; + return GL_NEAREST; case Tegra::Texture::TextureMipmapFilter::Nearest: - return GL_LINEAR_MIPMAP_NEAREST; + return GL_NEAREST_MIPMAP_NEAREST; case Tegra::Texture::TextureMipmapFilter::Linear: - return GL_LINEAR_MIPMAP_LINEAR; + return GL_NEAREST_MIPMAP_LINEAR; } break; - } - case Tegra::Texture::TextureFilter::Nearest: { - switch (mip_filter_mode) { + case Tegra::Texture::TextureFilter::Linear: + switch (mipmap_filter_mode) { case Tegra::Texture::TextureMipmapFilter::None: - return GL_NEAREST; + return GL_LINEAR; case Tegra::Texture::TextureMipmapFilter::Nearest: - return GL_NEAREST_MIPMAP_NEAREST; + return GL_LINEAR_MIPMAP_NEAREST; case Tegra::Texture::TextureMipmapFilter::Linear: - return GL_NEAREST_MIPMAP_LINEAR; + return GL_LINEAR_MIPMAP_LINEAR; } break; } - } - LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode)); - return GL_LINEAR; + UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", + static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode)); + return GL_NEAREST; } inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { @@ -229,10 +217,9 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { } else { return GL_MIRROR_CLAMP_TO_EDGE; } - default: - LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); - return GL_REPEAT; } + UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); + return GL_REPEAT; } inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { @@ -254,8 +241,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { case Tegra::Texture::DepthCompareFunc::Always: return GL_ALWAYS; } - LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}", - static_cast<u32>(func)); + UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func)); return GL_GREATER; } @@ -277,7 +263,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { case Maxwell::Blend::Equation::MaxGL: return GL_MAX; } - LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); + UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); return GL_FUNC_ADD; } @@ -341,7 +327,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: return GL_ONE_MINUS_CONSTANT_ALPHA; } - LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); + UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); return GL_ZERO; } @@ -361,7 +347,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) { case Tegra::Texture::SwizzleSource::OneFloat: return GL_ONE; } - LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source)); + UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source)); return GL_ZERO; } @@ -392,7 +378,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { case Maxwell::ComparisonOp::AlwaysOld: return GL_ALWAYS; } - LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison)); + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); return GL_ALWAYS; } @@ -423,7 +409,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) { case Maxwell::StencilOp::DecrWrapOGL: return GL_DECR_WRAP; } - LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil)); + UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil)); return GL_KEEP; } @@ -434,7 +420,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) { case Maxwell::FrontFace::CounterClockWise: return GL_CCW; } - LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); + UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face)); return GL_CCW; } @@ -447,7 +433,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) { case Maxwell::CullFace::FrontAndBack: return GL_FRONT_AND_BACK; } - LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); + UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); return GL_BACK; } @@ -486,7 +472,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) { case Maxwell::LogicOperation::Set: return GL_SET; } - LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation)); + UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation)); return GL_COPY; } diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 62e950d31..1f2b6734b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -21,29 +21,29 @@ namespace Sampler { VkFilter Filter(Tegra::Texture::TextureFilter filter) { switch (filter) { - case Tegra::Texture::TextureFilter::Linear: - return VK_FILTER_LINEAR; case Tegra::Texture::TextureFilter::Nearest: return VK_FILTER_NEAREST; + case Tegra::Texture::TextureFilter::Linear: + return VK_FILTER_LINEAR; } - UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); + UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter)); return {}; } VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { switch (mipmap_filter) { case Tegra::Texture::TextureMipmapFilter::None: - // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping - // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to - // use an image view with a single mipmap level to emulate this. - return VK_SAMPLER_MIPMAP_MODE_LINEAR; - ; - case Tegra::Texture::TextureMipmapFilter::Linear: - return VK_SAMPLER_MIPMAP_MODE_LINEAR; + // There are no Vulkan filter modes that directly correspond to OpenGL minification filters + // of GL_LINEAR or GL_NEAREST, but they can be emulated using + // VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter = + // VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively. + return VK_SAMPLER_MIPMAP_MODE_NEAREST; case Tegra::Texture::TextureMipmapFilter::Nearest: return VK_SAMPLER_MIPMAP_MODE_NEAREST; + case Tegra::Texture::TextureMipmapFilter::Linear: + return VK_SAMPLER_MIPMAP_MODE_LINEAR; } - UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); + UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); return {}; } @@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w case Tegra::Texture::WrapMode::MirrorOnceBorder: UNIMPLEMENTED(); return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; - default: - UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); - return {}; } + UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); + return {}; } VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { @@ -288,10 +287,9 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; case Maxwell::PrimitiveTopology::Patches: return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; - default: - UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); - return {}; } + UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); + return {}; } VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index e6f2fa553..616eacc36 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -9,6 +9,8 @@ #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/textures/texture.h" +using Tegra::Texture::TextureMipmapFilter; + namespace Vulkan { namespace { @@ -63,8 +65,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c ci.maxAnisotropy = tsc.GetMaxAnisotropy(); ci.compareEnable = tsc.depth_compare_enabled; ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); - ci.minLod = tsc.GetMinLod(); - ci.maxLod = tsc.GetMaxLod(); + ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(); + ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(); ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color); ci.unnormalizedCoordinates = VK_FALSE; return device.GetLogical().CreateSampler(ci); |