diff options
Diffstat (limited to 'src')
446 files changed, 17970 insertions, 13564 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a22b564d6..8777df751 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,6 +62,7 @@ else() -Werror=implicit-fallthrough -Werror=missing-declarations -Werror=reorder + -Werror=uninitialized -Werror=unused-result -Wextra -Wmissing-declarations diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 179560cd7..d2ce8c814 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -11,7 +11,6 @@ #include "audio_core/info_updater.h" #include "audio_core/voice_context.h" #include "common/logging/log.h" -#include "core/hle/kernel/writable_event.h" #include "core/memory.h" #include "core/settings.h" @@ -71,10 +70,9 @@ namespace { namespace AudioCore { AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, AudioCommon::AudioRendererParameter params, - std::shared_ptr<Kernel::WritableEvent> buffer_event_, + Stream::ReleaseCallback&& release_callback, std::size_t instance_number) - : worker_params{params}, buffer_event{buffer_event_}, - memory_pool_info(params.effect_count + params.voice_count * 4), + : worker_params{params}, memory_pool_info(params.effect_count + params.voice_count * 4), voice_context(params.voice_count), effect_context(params.effect_count), mix_context(), sink_context(params.sink_count), splitter_context(), voices(params.voice_count), memory{memory_}, @@ -85,10 +83,9 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory params.num_splitter_send_channels); mix_context.Initialize(behavior_info, params.submix_count + 1, params.effect_count); audio_out = std::make_unique<AudioCore::AudioOut>(); - stream = - audio_out->OpenStream(core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, - fmt::format("AudioRenderer-Instance{}", instance_number), - [=]() { buffer_event_->Signal(); }); + stream = audio_out->OpenStream( + core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, + fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback)); audio_out->StartStream(stream); QueueMixedBuffer(0); diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 90f7eafa4..18567f618 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -27,10 +27,6 @@ namespace Core::Timing { class CoreTiming; } -namespace Kernel { -class WritableEvent; -} - namespace Core::Memory { class Memory; } @@ -44,8 +40,7 @@ class AudioRenderer { public: AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, AudioCommon::AudioRendererParameter params, - std::shared_ptr<Kernel::WritableEvent> buffer_event_, - std::size_t instance_number); + Stream::ReleaseCallback&& release_callback, std::size_t instance_number); ~AudioRenderer(); [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params, @@ -61,7 +56,6 @@ private: BehaviorInfo behavior_info{}; AudioCommon::AudioRendererParameter worker_params; - std::shared_ptr<Kernel::WritableEvent> buffer_event; std::vector<ServerMemoryPoolInfo> memory_pool_info; VoiceContext voice_context; EffectContext effect_context; diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index cf7b186e4..043447eaa 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp @@ -30,6 +30,7 @@ public: params.rate = sample_rate; params.channels = num_channels; params.format = CUBEB_SAMPLE_S16NE; + params.prefs = CUBEB_STREAM_PREF_PERSIST; switch (num_channels) { case 1: params.layout = CUBEB_LAYOUT_MONO; diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index eca296589..afe68c9ed 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp @@ -130,7 +130,11 @@ bool Stream::ContainsBuffer([[maybe_unused]] Buffer::Tag tag) const { std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) { std::vector<Buffer::Tag> tags; for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) { - tags.push_back(released_buffers.front()->GetTag()); + if (released_buffers.front()) { + tags.push_back(released_buffers.front()->GetTag()); + } else { + ASSERT_MSG(false, "Invalid tag in released_buffers!"); + } released_buffers.pop(); } return tags; @@ -140,7 +144,11 @@ std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers() { std::vector<Buffer::Tag> tags; tags.reserve(released_buffers.size()); while (!released_buffers.empty()) { - tags.push_back(released_buffers.front()->GetTag()); + if (released_buffers.front()) { + tags.push_back(released_buffers.front()->GetTag()); + } else { + ASSERT_MSG(false, "Invalid tag in released_buffers!"); + } released_buffers.pop(); } return tags; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 56c7e21f5..5c8003eb1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -104,6 +104,7 @@ add_library(common STATIC detached_tasks.h bit_cast.h bit_field.h + bit_set.h bit_util.h cityhash.cpp cityhash.h @@ -140,7 +141,6 @@ add_library(common STATIC microprofile.h microprofileui.h misc.cpp - multi_level_queue.h page_table.cpp page_table.h param_package.cpp @@ -162,6 +162,8 @@ add_library(common STATIC thread.cpp thread.h thread_queue_list.h + thread_worker.cpp + thread_worker.h threadsafe_queue.h time_zone.cpp time_zone.h @@ -209,7 +211,6 @@ else() endif() create_target_directory_groups(common) -find_package(Boost 1.71 COMPONENTS context headers REQUIRED) target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile) target_link_libraries(common PRIVATE lz4::lz4 xbyak) diff --git a/src/common/bit_set.h b/src/common/bit_set.h new file mode 100644 index 000000000..9235ad412 --- /dev/null +++ b/src/common/bit_set.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018-2020 Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <array> +#include <bit> + +#include "common/alignment.h" +#include "common/bit_util.h" +#include "common/common_types.h" + +namespace Common { + +namespace impl { + +template <typename Storage, size_t N> +class BitSet { + +public: + constexpr BitSet() = default; + + constexpr void SetBit(size_t i) { + this->words[i / FlagsPerWord] |= GetBitMask(i % FlagsPerWord); + } + + constexpr void ClearBit(size_t i) { + this->words[i / FlagsPerWord] &= ~GetBitMask(i % FlagsPerWord); + } + + constexpr size_t CountLeadingZero() const { + for (size_t i = 0; i < NumWords; i++) { + if (this->words[i]) { + return FlagsPerWord * i + CountLeadingZeroImpl(this->words[i]); + } + } + return FlagsPerWord * NumWords; + } + + constexpr size_t GetNextSet(size_t n) const { + for (size_t i = (n + 1) / FlagsPerWord; i < NumWords; i++) { + Storage word = this->words[i]; + if (!IsAligned(n + 1, FlagsPerWord)) { + word &= GetBitMask(n % FlagsPerWord) - 1; + } + if (word) { + return FlagsPerWord * i + CountLeadingZeroImpl(word); + } + } + return FlagsPerWord * NumWords; + } + +private: + static_assert(std::is_unsigned_v<Storage>); + static_assert(sizeof(Storage) <= sizeof(u64)); + + static constexpr size_t FlagsPerWord = BitSize<Storage>(); + static constexpr size_t NumWords = AlignUp(N, FlagsPerWord) / FlagsPerWord; + + static constexpr auto CountLeadingZeroImpl(Storage word) { + return std::countl_zero(static_cast<unsigned long long>(word)) - + (BitSize<unsigned long long>() - FlagsPerWord); + } + + static constexpr Storage GetBitMask(size_t bit) { + return Storage(1) << (FlagsPerWord - 1 - bit); + } + + std::array<Storage, NumWords> words{}; +}; + +} // namespace impl + +template <size_t N> +using BitSet8 = impl::BitSet<u8, N>; + +template <size_t N> +using BitSet16 = impl::BitSet<u16, N>; + +template <size_t N> +using BitSet32 = impl::BitSet<u32, N>; + +template <size_t N> +using BitSet64 = impl::BitSet<u64, N>; + +} // namespace Common diff --git a/src/common/concepts.h b/src/common/concepts.h index 5bef3ad67..aa08065a7 100644 --- a/src/common/concepts.h +++ b/src/common/concepts.h @@ -31,4 +31,8 @@ concept DerivedFrom = requires { std::is_convertible_v<const volatile Derived*, const volatile Base*>; }; +// TODO: Replace with std::convertible_to when libc++ implements it. +template <typename From, typename To> +concept ConvertibleTo = std::is_convertible_v<From, To>; + } // namespace Common diff --git a/src/common/multi_level_queue.h b/src/common/multi_level_queue.h deleted file mode 100644 index 4b305bf40..000000000 --- a/src/common/multi_level_queue.h +++ /dev/null @@ -1,345 +0,0 @@ -// Copyright 2019 TuxSH -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <iterator> -#include <list> -#include <utility> - -#include "common/bit_util.h" -#include "common/common_types.h" - -namespace Common { - -/** - * A MultiLevelQueue is a type of priority queue which has the following characteristics: - * - iteratable through each of its elements. - * - back can be obtained. - * - O(1) add, lookup (both front and back) - * - discrete priorities and a max of 64 priorities (limited domain) - * This type of priority queue is normaly used for managing threads within an scheduler - */ -template <typename T, std::size_t Depth> -class MultiLevelQueue { -public: - using value_type = T; - using reference = value_type&; - using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; - - using difference_type = typename std::pointer_traits<pointer>::difference_type; - using size_type = std::size_t; - - template <bool is_constant> - class iterator_impl { - public: - using iterator_category = std::bidirectional_iterator_tag; - using value_type = T; - using pointer = std::conditional_t<is_constant, T*, const T*>; - using reference = std::conditional_t<is_constant, const T&, T&>; - using difference_type = typename std::pointer_traits<pointer>::difference_type; - - friend bool operator==(const iterator_impl& lhs, const iterator_impl& rhs) { - if (lhs.IsEnd() && rhs.IsEnd()) - return true; - return std::tie(lhs.current_priority, lhs.it) == std::tie(rhs.current_priority, rhs.it); - } - - friend bool operator!=(const iterator_impl& lhs, const iterator_impl& rhs) { - return !operator==(lhs, rhs); - } - - reference operator*() const { - return *it; - } - - pointer operator->() const { - return it.operator->(); - } - - iterator_impl& operator++() { - if (IsEnd()) { - return *this; - } - - ++it; - - if (it == GetEndItForPrio()) { - u64 prios = mlq.used_priorities; - prios &= ~((1ULL << (current_priority + 1)) - 1); - if (prios == 0) { - current_priority = static_cast<u32>(mlq.depth()); - } else { - current_priority = CountTrailingZeroes64(prios); - it = GetBeginItForPrio(); - } - } - return *this; - } - - iterator_impl& operator--() { - if (IsEnd()) { - if (mlq.used_priorities != 0) { - current_priority = 63 - CountLeadingZeroes64(mlq.used_priorities); - it = GetEndItForPrio(); - --it; - } - } else if (it == GetBeginItForPrio()) { - u64 prios = mlq.used_priorities; - prios &= (1ULL << current_priority) - 1; - if (prios != 0) { - current_priority = CountTrailingZeroes64(prios); - it = GetEndItForPrio(); - --it; - } - } else { - --it; - } - return *this; - } - - iterator_impl operator++(int) { - const iterator_impl v{*this}; - ++(*this); - return v; - } - - iterator_impl operator--(int) { - const iterator_impl v{*this}; - --(*this); - return v; - } - - // allow implicit const->non-const - iterator_impl(const iterator_impl<false>& other) - : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {} - - iterator_impl(const iterator_impl<true>& other) - : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {} - - iterator_impl& operator=(const iterator_impl<false>& other) { - mlq = other.mlq; - it = other.it; - current_priority = other.current_priority; - return *this; - } - - friend class iterator_impl<true>; - iterator_impl() = default; - - private: - friend class MultiLevelQueue; - using container_ref = - std::conditional_t<is_constant, const MultiLevelQueue&, MultiLevelQueue&>; - using list_iterator = std::conditional_t<is_constant, typename std::list<T>::const_iterator, - typename std::list<T>::iterator>; - - explicit iterator_impl(container_ref mlq, list_iterator it, u32 current_priority) - : mlq(mlq), it(it), current_priority(current_priority) {} - explicit iterator_impl(container_ref mlq, u32 current_priority) - : mlq(mlq), it(), current_priority(current_priority) {} - - bool IsEnd() const { - return current_priority == mlq.depth(); - } - - list_iterator GetBeginItForPrio() const { - return mlq.levels[current_priority].begin(); - } - - list_iterator GetEndItForPrio() const { - return mlq.levels[current_priority].end(); - } - - container_ref mlq; - list_iterator it; - u32 current_priority; - }; - - using iterator = iterator_impl<false>; - using const_iterator = iterator_impl<true>; - - void add(const T& element, u32 priority, bool send_back = true) { - if (send_back) - levels[priority].push_back(element); - else - levels[priority].push_front(element); - used_priorities |= 1ULL << priority; - } - - void remove(const T& element, u32 priority) { - auto it = ListIterateTo(levels[priority], element); - if (it == levels[priority].end()) - return; - levels[priority].erase(it); - if (levels[priority].empty()) { - used_priorities &= ~(1ULL << priority); - } - } - - void adjust(const T& element, u32 old_priority, u32 new_priority, bool adjust_front = false) { - remove(element, old_priority); - add(element, new_priority, !adjust_front); - } - void adjust(const_iterator it, u32 old_priority, u32 new_priority, bool adjust_front = false) { - adjust(*it, old_priority, new_priority, adjust_front); - } - - void transfer_to_front(const T& element, u32 priority, MultiLevelQueue& other) { - ListSplice(other.levels[priority], other.levels[priority].begin(), levels[priority], - ListIterateTo(levels[priority], element)); - - other.used_priorities |= 1ULL << priority; - - if (levels[priority].empty()) { - used_priorities &= ~(1ULL << priority); - } - } - - void transfer_to_front(const_iterator it, u32 priority, MultiLevelQueue& other) { - transfer_to_front(*it, priority, other); - } - - void transfer_to_back(const T& element, u32 priority, MultiLevelQueue& other) { - ListSplice(other.levels[priority], other.levels[priority].end(), levels[priority], - ListIterateTo(levels[priority], element)); - - other.used_priorities |= 1ULL << priority; - - if (levels[priority].empty()) { - used_priorities &= ~(1ULL << priority); - } - } - - void transfer_to_back(const_iterator it, u32 priority, MultiLevelQueue& other) { - transfer_to_back(*it, priority, other); - } - - void yield(u32 priority, std::size_t n = 1) { - ListShiftForward(levels[priority], n); - } - - [[nodiscard]] std::size_t depth() const { - return Depth; - } - - [[nodiscard]] std::size_t size(u32 priority) const { - return levels[priority].size(); - } - - [[nodiscard]] std::size_t size() const { - u64 priorities = used_priorities; - std::size_t size = 0; - while (priorities != 0) { - const u64 current_priority = CountTrailingZeroes64(priorities); - size += levels[current_priority].size(); - priorities &= ~(1ULL << current_priority); - } - return size; - } - - [[nodiscard]] bool empty() const { - return used_priorities == 0; - } - - [[nodiscard]] bool empty(u32 priority) const { - return (used_priorities & (1ULL << priority)) == 0; - } - - [[nodiscard]] u32 highest_priority_set(u32 max_priority = 0) const { - const u64 priorities = - max_priority == 0 ? used_priorities : (used_priorities & ~((1ULL << max_priority) - 1)); - return priorities == 0 ? Depth : static_cast<u32>(CountTrailingZeroes64(priorities)); - } - - [[nodiscard]] u32 lowest_priority_set(u32 min_priority = Depth - 1) const { - const u64 priorities = min_priority >= Depth - 1 - ? used_priorities - : (used_priorities & ((1ULL << (min_priority + 1)) - 1)); - return priorities == 0 ? Depth : 63 - CountLeadingZeroes64(priorities); - } - - [[nodiscard]] const_iterator cbegin(u32 max_prio = 0) const { - const u32 priority = highest_priority_set(max_prio); - return priority == Depth ? cend() - : const_iterator{*this, levels[priority].cbegin(), priority}; - } - [[nodiscard]] const_iterator begin(u32 max_prio = 0) const { - return cbegin(max_prio); - } - [[nodiscard]] iterator begin(u32 max_prio = 0) { - const u32 priority = highest_priority_set(max_prio); - return priority == Depth ? end() : iterator{*this, levels[priority].begin(), priority}; - } - - [[nodiscard]] const_iterator cend(u32 min_prio = Depth - 1) const { - return min_prio == Depth - 1 ? const_iterator{*this, Depth} : cbegin(min_prio + 1); - } - [[nodiscard]] const_iterator end(u32 min_prio = Depth - 1) const { - return cend(min_prio); - } - [[nodiscard]] iterator end(u32 min_prio = Depth - 1) { - return min_prio == Depth - 1 ? iterator{*this, Depth} : begin(min_prio + 1); - } - - [[nodiscard]] T& front(u32 max_priority = 0) { - const u32 priority = highest_priority_set(max_priority); - return levels[priority == Depth ? 0 : priority].front(); - } - [[nodiscard]] const T& front(u32 max_priority = 0) const { - const u32 priority = highest_priority_set(max_priority); - return levels[priority == Depth ? 0 : priority].front(); - } - - [[nodiscard]] T& back(u32 min_priority = Depth - 1) { - const u32 priority = lowest_priority_set(min_priority); // intended - return levels[priority == Depth ? 63 : priority].back(); - } - [[nodiscard]] const T& back(u32 min_priority = Depth - 1) const { - const u32 priority = lowest_priority_set(min_priority); // intended - return levels[priority == Depth ? 63 : priority].back(); - } - - void clear() { - used_priorities = 0; - for (std::size_t i = 0; i < Depth; i++) { - levels[i].clear(); - } - } - -private: - using const_list_iterator = typename std::list<T>::const_iterator; - - static void ListShiftForward(std::list<T>& list, const std::size_t shift = 1) { - if (shift >= list.size()) { - return; - } - - const auto begin_range = list.begin(); - const auto end_range = std::next(begin_range, shift); - list.splice(list.end(), list, begin_range, end_range); - } - - static void ListSplice(std::list<T>& in_list, const_list_iterator position, - std::list<T>& out_list, const_list_iterator element) { - in_list.splice(position, out_list, element); - } - - [[nodiscard]] static const_list_iterator ListIterateTo(const std::list<T>& list, - const T& element) { - auto it = list.cbegin(); - while (it != list.cend() && *it != element) { - ++it; - } - return it; - } - - std::array<std::list<T>, Depth> levels; - u64 used_priorities = 0; -}; - -} // namespace Common diff --git a/src/common/scope_exit.h b/src/common/scope_exit.h index 68ef5f197..fa46cb394 100644 --- a/src/common/scope_exit.h +++ b/src/common/scope_exit.h @@ -10,7 +10,7 @@ namespace detail { template <typename Func> struct ScopeExitHelper { - explicit ScopeExitHelper(Func&& func) : func(std::move(func)) {} + explicit ScopeExitHelper(Func&& func_) : func(std::move(func_)) {} ~ScopeExitHelper() { if (active) { func(); diff --git a/src/common/telemetry.h b/src/common/telemetry.h index a50c5d1de..49186e848 100644 --- a/src/common/telemetry.h +++ b/src/common/telemetry.h @@ -52,8 +52,8 @@ public: template <typename T> class Field : public FieldInterface { public: - Field(FieldType type, std::string name, T value) - : name(std::move(name)), type(type), value(std::move(value)) {} + Field(FieldType type_, std::string name_, T value_) + : name(std::move(name_)), type(type_), value(std::move(value_)) {} Field(const Field&) = default; Field& operator=(const Field&) = default; diff --git a/src/common/thread_worker.cpp b/src/common/thread_worker.cpp new file mode 100644 index 000000000..8f9bf447a --- /dev/null +++ b/src/common/thread_worker.cpp @@ -0,0 +1,58 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/thread.h" +#include "common/thread_worker.h" + +namespace Common { + +ThreadWorker::ThreadWorker(std::size_t num_workers, const std::string& name) { + for (std::size_t i = 0; i < num_workers; ++i) + threads.emplace_back([this, thread_name{std::string{name}}] { + Common::SetCurrentThreadName(thread_name.c_str()); + + // Wait for first request + { + std::unique_lock lock{queue_mutex}; + condition.wait(lock, [this] { return stop || !requests.empty(); }); + } + + while (true) { + std::function<void()> task; + + { + std::unique_lock lock{queue_mutex}; + condition.wait(lock, [this] { return stop || !requests.empty(); }); + if (stop || requests.empty()) { + return; + } + task = std::move(requests.front()); + requests.pop(); + } + + task(); + } + }); +} + +ThreadWorker::~ThreadWorker() { + { + std::unique_lock lock{queue_mutex}; + stop = true; + } + condition.notify_all(); + for (std::thread& thread : threads) { + thread.join(); + } +} + +void ThreadWorker::QueueWork(std::function<void()>&& work) { + { + std::unique_lock lock{queue_mutex}; + requests.emplace(work); + } + condition.notify_one(); +} + +} // namespace Common diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h new file mode 100644 index 000000000..f1859971f --- /dev/null +++ b/src/common/thread_worker.h @@ -0,0 +1,30 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> +#include <functional> +#include <mutex> +#include <string> +#include <vector> +#include <queue> + +namespace Common { + +class ThreadWorker final { +public: + explicit ThreadWorker(std::size_t num_workers, const std::string& name); + ~ThreadWorker(); + void QueueWork(std::function<void()>&& work); + +private: + std::vector<std::thread> threads; + std::queue<std::function<void()>> requests; + std::mutex queue_mutex; + std::condition_variable condition; + std::atomic_bool stop{}; +}; + +} // namespace Common diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 26e4bfda5..c2c9b6134 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -11,25 +11,25 @@ namespace Common::X64 { -constexpr std::size_t RegToIndex(const Xbyak::Reg& reg) { +constexpr size_t RegToIndex(const Xbyak::Reg& reg) { using Kind = Xbyak::Reg::Kind; ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, "RegSet only support GPRs and XMM registers."); ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15."); - return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); + return static_cast<size_t>(reg.getIdx()) + (reg.getKind() == Kind::REG ? 0 : 16); } -constexpr Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { +constexpr Xbyak::Reg64 IndexToReg64(size_t reg_index) { ASSERT(reg_index < 16); return Xbyak::Reg64(static_cast<int>(reg_index)); } -constexpr Xbyak::Xmm IndexToXmm(std::size_t reg_index) { +constexpr Xbyak::Xmm IndexToXmm(size_t reg_index) { ASSERT(reg_index >= 16 && reg_index < 32); return Xbyak::Xmm(static_cast<int>(reg_index - 16)); } -constexpr Xbyak::Reg IndexToReg(std::size_t reg_index) { +constexpr Xbyak::Reg IndexToReg(size_t reg_index) { if (reg_index < 16) { return IndexToReg64(reg_index); } else { @@ -182,7 +182,7 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b size_t rsp_alignment, size_t needed_frame_size = 0) { auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); - for (std::size_t i = 0; i < regs.size(); ++i) { + for (size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_GPRS[i]) { code.push(IndexToReg64(i)); } @@ -192,7 +192,7 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b code.sub(code.rsp, frame_info.subtraction); } - for (std::size_t i = 0; i < regs.size(); ++i) { + for (size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); frame_info.xmm_offset += 0x10; @@ -206,7 +206,7 @@ inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bits size_t rsp_alignment, size_t needed_frame_size = 0) { auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); - for (std::size_t i = 0; i < regs.size(); ++i) { + for (size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); frame_info.xmm_offset += 0x10; @@ -218,8 +218,8 @@ inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bits } // GPRs need to be popped in reverse order - for (std::size_t j = 0; j < regs.size(); ++j) { - const std::size_t i = regs.size() - j - 1; + for (size_t j = 0; j < regs.size(); ++j) { + const size_t i = regs.size() - j - 1; if (regs[i] && ABI_ALL_GPRS[i]) { code.pop(IndexToReg64(i)); } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 66de33799..01f3e9419 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -41,6 +41,7 @@ add_library(core STATIC file_sys/bis_factory.h file_sys/card_image.cpp file_sys/card_image.h + file_sys/common_funcs.h file_sys/content_archive.cpp file_sys/content_archive.h file_sys/control_metadata.cpp @@ -134,6 +135,8 @@ add_library(core STATIC frontend/emu_window.h frontend/framebuffer_layout.cpp frontend/framebuffer_layout.h + frontend/input_interpreter.cpp + frontend/input_interpreter.h frontend/input.h hardware_interrupt_manager.cpp hardware_interrupt_manager.h @@ -148,10 +151,19 @@ add_library(core STATIC hle/kernel/code_set.cpp hle/kernel/code_set.h hle/kernel/errors.h + hle/kernel/global_scheduler_context.cpp + hle/kernel/global_scheduler_context.h hle/kernel/handle_table.cpp hle/kernel/handle_table.h hle/kernel/hle_ipc.cpp hle/kernel/hle_ipc.h + hle/kernel/k_affinity_mask.h + hle/kernel/k_priority_queue.h + hle/kernel/k_scheduler.cpp + hle/kernel/k_scheduler.h + hle/kernel/k_scheduler_lock.h + hle/kernel/k_scoped_lock.h + hle/kernel/k_scoped_scheduler_lock_and_sleep.h hle/kernel/kernel.cpp hle/kernel/kernel.h hle/kernel/memory/address_space_info.cpp @@ -186,12 +198,12 @@ add_library(core STATIC hle/kernel/readable_event.h hle/kernel/resource_limit.cpp hle/kernel/resource_limit.h - hle/kernel/scheduler.cpp - hle/kernel/scheduler.h hle/kernel/server_port.cpp hle/kernel/server_port.h hle/kernel/server_session.cpp hle/kernel/server_session.h + hle/kernel/service_thread.cpp + hle/kernel/service_thread.h hle/kernel/session.cpp hle/kernel/session.h hle/kernel/shared_memory.cpp @@ -490,7 +502,6 @@ add_library(core STATIC hle/service/sm/controller.h hle/service/sm/sm.cpp hle/service/sm/sm.h - hle/service/sockets/blocking_worker.h hle/service/sockets/bsd.cpp hle/service/sockets/bsd.h hle/service/sockets/ethc.cpp diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 193fd7d62..e9c74b1a6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -294,6 +294,9 @@ void ARM_Dynarmic_32::InvalidateCacheRange(VAddr addr, std::size_t size) { } void ARM_Dynarmic_32::ClearExclusiveState() { + if (!jit) { + return; + } jit->ClearExclusiveState(); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 0f0585d0f..7a4eb88a2 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -15,8 +15,8 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/hardware_properties.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" #include "core/memory.h" #include "core/settings.h" @@ -330,6 +330,9 @@ void ARM_Dynarmic_64::InvalidateCacheRange(VAddr addr, std::size_t size) { } void ARM_Dynarmic_64::ClearExclusiveState() { + if (!jit) { + return; + } jit->ClearExclusiveState(); } diff --git a/src/core/core.cpp b/src/core/core.cpp index 01e4faac8..1a2002dec 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -27,10 +27,10 @@ #include "core/file_sys/vfs_real.h" #include "core/hardware_interrupt_manager.h" #include "core/hle/kernel/client_port.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "core/hle/service/am/applets/applets.h" #include "core/hle/service/apm/controller.h" @@ -159,7 +159,7 @@ struct System::Impl { device_memory = std::make_unique<Core::DeviceMemory>(); is_multicore = Settings::values.use_multi_core.GetValue(); - is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation.GetValue(); + is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue(); kernel.SetMulticore(is_multicore); cpu_manager.SetMulticore(is_multicore); @@ -237,7 +237,7 @@ struct System::Impl { Kernel::Process::Create(system, "main", Kernel::Process::ProcessType::Userland); const auto [load_result, load_parameters] = app_loader->Load(*main_process, system); if (load_result != Loader::ResultStatus::Success) { - LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result)); + LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", load_result); Shutdown(); return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) + @@ -267,8 +267,7 @@ struct System::Impl { u64 title_id{0}; if (app_loader->ReadProgramId(title_id) != Loader::ResultStatus::Success) { - LOG_ERROR(Core, "Failed to find title id for ROM (Error {})", - static_cast<u32>(load_result)); + LOG_ERROR(Core, "Failed to find title id for ROM (Error {})", load_result); } perf_stats = std::make_unique<PerfStats>(title_id); // Reset counters and set time origin to current frame @@ -308,7 +307,6 @@ struct System::Impl { service_manager.reset(); cheat_engine.reset(); telemetry_session.reset(); - device_memory.reset(); // Close all CPU/threading state cpu_manager.Shutdown(); @@ -508,14 +506,6 @@ std::size_t System::CurrentCoreIndex() const { return core; } -Kernel::Scheduler& System::CurrentScheduler() { - return impl->kernel.CurrentScheduler(); -} - -const Kernel::Scheduler& System::CurrentScheduler() const { - return impl->kernel.CurrentScheduler(); -} - Kernel::PhysicalCore& System::CurrentPhysicalCore() { return impl->kernel.CurrentPhysicalCore(); } @@ -524,22 +514,14 @@ const Kernel::PhysicalCore& System::CurrentPhysicalCore() const { return impl->kernel.CurrentPhysicalCore(); } -Kernel::Scheduler& System::Scheduler(std::size_t core_index) { - return impl->kernel.Scheduler(core_index); -} - -const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const { - return impl->kernel.Scheduler(core_index); -} - /// Gets the global scheduler -Kernel::GlobalScheduler& System::GlobalScheduler() { - return impl->kernel.GlobalScheduler(); +Kernel::GlobalSchedulerContext& System::GlobalSchedulerContext() { + return impl->kernel.GlobalSchedulerContext(); } /// Gets the global scheduler -const Kernel::GlobalScheduler& System::GlobalScheduler() const { - return impl->kernel.GlobalScheduler(); +const Kernel::GlobalSchedulerContext& System::GlobalSchedulerContext() const { + return impl->kernel.GlobalSchedulerContext(); } Kernel::Process* System::CurrentProcess() { diff --git a/src/core/core.h b/src/core/core.h index 29b8fb92a..579a774e4 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -26,11 +26,11 @@ class VfsFilesystem; } // namespace FileSys namespace Kernel { -class GlobalScheduler; +class GlobalSchedulerContext; class KernelCore; class PhysicalCore; class Process; -class Scheduler; +class KScheduler; } // namespace Kernel namespace Loader { @@ -213,12 +213,6 @@ public: /// Gets the index of the currently running CPU core [[nodiscard]] std::size_t CurrentCoreIndex() const; - /// Gets the scheduler for the CPU core that is currently running - [[nodiscard]] Kernel::Scheduler& CurrentScheduler(); - - /// Gets the scheduler for the CPU core that is currently running - [[nodiscard]] const Kernel::Scheduler& CurrentScheduler() const; - /// Gets the physical core for the CPU core that is currently running [[nodiscard]] Kernel::PhysicalCore& CurrentPhysicalCore(); @@ -261,17 +255,11 @@ public: /// Gets an immutable reference to the renderer. [[nodiscard]] const VideoCore::RendererBase& Renderer() const; - /// Gets the scheduler for the CPU core with the specified index - [[nodiscard]] Kernel::Scheduler& Scheduler(std::size_t core_index); - - /// Gets the scheduler for the CPU core with the specified index - [[nodiscard]] const Kernel::Scheduler& Scheduler(std::size_t core_index) const; - /// Gets the global scheduler - [[nodiscard]] Kernel::GlobalScheduler& GlobalScheduler(); + [[nodiscard]] Kernel::GlobalSchedulerContext& GlobalSchedulerContext(); /// Gets the global scheduler - [[nodiscard]] const Kernel::GlobalScheduler& GlobalScheduler() const; + [[nodiscard]] const Kernel::GlobalSchedulerContext& GlobalSchedulerContext() const; /// Gets the manager for the guest device memory [[nodiscard]] Core::DeviceMemory& DeviceMemory(); diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 0cff985e9..373395047 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -10,9 +10,9 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/cpu_manager.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/physical_core.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "video_core/gpu.h" @@ -109,11 +109,8 @@ void* CpuManager::GetStartFuncParamater() { void CpuManager::MultiCoreRunGuestThread() { auto& kernel = system.Kernel(); - { - auto& sched = kernel.CurrentScheduler(); - sched.OnThreadStart(); - } - auto* thread = kernel.CurrentScheduler().GetCurrentThread(); + kernel.CurrentScheduler()->OnThreadStart(); + auto* thread = kernel.CurrentScheduler()->GetCurrentThread(); auto& host_context = thread->GetHostContext(); host_context->SetRewindPoint(GuestRewindFunction, this); MultiCoreRunGuestLoop(); @@ -130,8 +127,8 @@ void CpuManager::MultiCoreRunGuestLoop() { physical_core = &kernel.CurrentPhysicalCore(); } system.ExitDynarmicProfile(); - auto& scheduler = kernel.CurrentScheduler(); - scheduler.TryDoContextSwitch(); + physical_core->ArmInterface().ClearExclusiveState(); + kernel.CurrentScheduler()->RescheduleCurrentCore(); } } @@ -140,25 +137,21 @@ void CpuManager::MultiCoreRunIdleThread() { while (true) { auto& physical_core = kernel.CurrentPhysicalCore(); physical_core.Idle(); - auto& scheduler = kernel.CurrentScheduler(); - scheduler.TryDoContextSwitch(); + kernel.CurrentScheduler()->RescheduleCurrentCore(); } } void CpuManager::MultiCoreRunSuspendThread() { auto& kernel = system.Kernel(); - { - auto& sched = kernel.CurrentScheduler(); - sched.OnThreadStart(); - } + kernel.CurrentScheduler()->OnThreadStart(); while (true) { auto core = kernel.GetCurrentHostThreadID(); - auto& scheduler = kernel.CurrentScheduler(); + auto& scheduler = *kernel.CurrentScheduler(); Kernel::Thread* current_thread = scheduler.GetCurrentThread(); Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context); ASSERT(scheduler.ContextSwitchPending()); ASSERT(core == kernel.GetCurrentHostThreadID()); - scheduler.TryDoContextSwitch(); + scheduler.RescheduleCurrentCore(); } } @@ -206,11 +199,8 @@ void CpuManager::MultiCorePause(bool paused) { void CpuManager::SingleCoreRunGuestThread() { auto& kernel = system.Kernel(); - { - auto& sched = kernel.CurrentScheduler(); - sched.OnThreadStart(); - } - auto* thread = kernel.CurrentScheduler().GetCurrentThread(); + kernel.CurrentScheduler()->OnThreadStart(); + auto* thread = kernel.CurrentScheduler()->GetCurrentThread(); auto& host_context = thread->GetHostContext(); host_context->SetRewindPoint(GuestRewindFunction, this); SingleCoreRunGuestLoop(); @@ -218,7 +208,7 @@ void CpuManager::SingleCoreRunGuestThread() { void CpuManager::SingleCoreRunGuestLoop() { auto& kernel = system.Kernel(); - auto* thread = kernel.CurrentScheduler().GetCurrentThread(); + auto* thread = kernel.CurrentScheduler()->GetCurrentThread(); while (true) { auto* physical_core = &kernel.CurrentPhysicalCore(); system.EnterDynarmicProfile(); @@ -230,9 +220,10 @@ void CpuManager::SingleCoreRunGuestLoop() { thread->SetPhantomMode(true); system.CoreTiming().Advance(); thread->SetPhantomMode(false); + physical_core->ArmInterface().ClearExclusiveState(); PreemptSingleCore(); auto& scheduler = kernel.Scheduler(current_core); - scheduler.TryDoContextSwitch(); + scheduler.RescheduleCurrentCore(); } } @@ -244,51 +235,53 @@ void CpuManager::SingleCoreRunIdleThread() { system.CoreTiming().AddTicks(1000U); idle_count++; auto& scheduler = physical_core.Scheduler(); - scheduler.TryDoContextSwitch(); + scheduler.RescheduleCurrentCore(); } } void CpuManager::SingleCoreRunSuspendThread() { auto& kernel = system.Kernel(); - { - auto& sched = kernel.CurrentScheduler(); - sched.OnThreadStart(); - } + kernel.CurrentScheduler()->OnThreadStart(); while (true) { auto core = kernel.GetCurrentHostThreadID(); - auto& scheduler = kernel.CurrentScheduler(); + auto& scheduler = *kernel.CurrentScheduler(); Kernel::Thread* current_thread = scheduler.GetCurrentThread(); Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[0].host_context); ASSERT(scheduler.ContextSwitchPending()); ASSERT(core == kernel.GetCurrentHostThreadID()); - scheduler.TryDoContextSwitch(); + scheduler.RescheduleCurrentCore(); } } void CpuManager::PreemptSingleCore(bool from_running_enviroment) { - std::size_t old_core = current_core; - auto& scheduler = system.Kernel().Scheduler(old_core); - Kernel::Thread* current_thread = scheduler.GetCurrentThread(); - if (idle_count >= 4 || from_running_enviroment) { - if (!from_running_enviroment) { - system.CoreTiming().Idle(); - idle_count = 0; + { + auto& scheduler = system.Kernel().Scheduler(current_core); + Kernel::Thread* current_thread = scheduler.GetCurrentThread(); + if (idle_count >= 4 || from_running_enviroment) { + if (!from_running_enviroment) { + system.CoreTiming().Idle(); + idle_count = 0; + } + current_thread->SetPhantomMode(true); + system.CoreTiming().Advance(); + current_thread->SetPhantomMode(false); } - current_thread->SetPhantomMode(true); - system.CoreTiming().Advance(); - current_thread->SetPhantomMode(false); + current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES); + system.CoreTiming().ResetTicks(); + scheduler.Unload(scheduler.GetCurrentThread()); + + auto& next_scheduler = system.Kernel().Scheduler(current_core); + Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext()); } - current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES); - system.CoreTiming().ResetTicks(); - scheduler.Unload(); - auto& next_scheduler = system.Kernel().Scheduler(current_core); - Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext()); - /// May have changed scheduler - auto& current_scheduler = system.Kernel().Scheduler(current_core); - current_scheduler.Reload(); - auto* currrent_thread2 = current_scheduler.GetCurrentThread(); - if (!currrent_thread2->IsIdleThread()) { - idle_count = 0; + + // May have changed scheduler + { + auto& scheduler = system.Kernel().Scheduler(current_core); + scheduler.Reload(scheduler.GetCurrentThread()); + auto* currrent_thread2 = scheduler.GetCurrentThread(); + if (!currrent_thread2->IsIdleThread()) { + idle_count = 0; + } } } @@ -369,8 +362,7 @@ void CpuManager::RunThread(std::size_t core) { return; } - auto& scheduler = system.Kernel().CurrentScheduler(); - Kernel::Thread* current_thread = scheduler.GetCurrentThread(); + auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread(); data.is_running = true; Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext()); data.is_running = false; diff --git a/src/core/file_sys/common_funcs.h b/src/core/file_sys/common_funcs.h new file mode 100644 index 000000000..7ed97aa50 --- /dev/null +++ b/src/core/file_sys/common_funcs.h @@ -0,0 +1,56 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace FileSys { + +constexpr u64 AOC_TITLE_ID_MASK = 0x7FF; +constexpr u64 AOC_TITLE_ID_OFFSET = 0x1000; +constexpr u64 BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000; + +/** + * Gets the base title ID from a given title ID. + * + * @param title_id The title ID. + * @returns The base title ID. + */ +[[nodiscard]] constexpr u64 GetBaseTitleID(u64 title_id) { + return title_id & BASE_TITLE_ID_MASK; +} + +/** + * Gets the base title ID with a program index offset from a given title ID. + * + * @param title_id The title ID. + * @param program_index The program index. + * @returns The base title ID with a program index offset. + */ +[[nodiscard]] constexpr u64 GetBaseTitleIDWithProgramIndex(u64 title_id, u64 program_index) { + return GetBaseTitleID(title_id) + program_index; +} + +/** + * Gets the AOC (Add-On Content) base title ID from a given title ID. + * + * @param title_id The title ID. + * @returns The AOC base title ID. + */ +[[nodiscard]] constexpr u64 GetAOCBaseTitleID(u64 title_id) { + return GetBaseTitleID(title_id) + AOC_TITLE_ID_OFFSET; +} + +/** + * Gets the AOC (Add-On Content) ID from a given AOC title ID. + * + * @param aoc_title_id The AOC title ID. + * @returns The AOC ID. + */ +[[nodiscard]] constexpr u64 GetAOCID(u64 aoc_title_id) { + return aoc_title_id & AOC_TITLE_ID_MASK; +} + +} // namespace FileSys diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp index 76af47ff9..a6c0337fa 100644 --- a/src/core/file_sys/content_archive.cpp +++ b/src/core/file_sys/content_archive.cpp @@ -410,8 +410,9 @@ u8 NCA::GetCryptoRevision() const { std::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType type) const { const auto master_key_id = GetCryptoRevision(); - if (!keys.HasKey(Core::Crypto::S128KeyType::KeyArea, master_key_id, header.key_index)) - return {}; + if (!keys.HasKey(Core::Crypto::S128KeyType::KeyArea, master_key_id, header.key_index)) { + return std::nullopt; + } std::vector<u8> key_area(header.key_area.begin(), header.key_area.end()); Core::Crypto::AESCipher<Core::Crypto::Key128> cipher( @@ -420,15 +421,17 @@ std::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType type cipher.Transcode(key_area.data(), key_area.size(), key_area.data(), Core::Crypto::Op::Decrypt); Core::Crypto::Key128 out; - if (type == NCASectionCryptoType::XTS) + if (type == NCASectionCryptoType::XTS) { std::copy(key_area.begin(), key_area.begin() + 0x10, out.begin()); - else if (type == NCASectionCryptoType::CTR || type == NCASectionCryptoType::BKTR) + } else if (type == NCASectionCryptoType::CTR || type == NCASectionCryptoType::BKTR) { std::copy(key_area.begin() + 0x20, key_area.begin() + 0x30, out.begin()); - else + } else { LOG_CRITICAL(Crypto, "Called GetKeyAreaKey on invalid NCASectionCryptoType type={:02X}", - static_cast<u8>(type)); + type); + } + u128 out_128{}; - memcpy(out_128.data(), out.data(), 16); + std::memcpy(out_128.data(), out.data(), sizeof(u128)); LOG_TRACE(Crypto, "called with crypto_rev={:02X}, kak_index={:02X}, key={:016X}{:016X}", master_key_id, header.key_index, out_128[1], out_128[0]); @@ -507,7 +510,7 @@ VirtualFile NCA::Decrypt(const NCASectionHeader& s_header, VirtualFile in, u64 s // TODO(DarkLordZach): Find a test case for XTS-encrypted NCAs default: LOG_ERROR(Crypto, "called with unhandled crypto type={:02X}", - static_cast<u8>(s_header.raw.header.crypto_type)); + s_header.raw.header.crypto_type); return nullptr; } } @@ -516,15 +519,17 @@ Loader::ResultStatus NCA::GetStatus() const { return status; } -std::vector<std::shared_ptr<VfsFile>> NCA::GetFiles() const { - if (status != Loader::ResultStatus::Success) +std::vector<VirtualFile> NCA::GetFiles() const { + if (status != Loader::ResultStatus::Success) { return {}; + } return files; } -std::vector<std::shared_ptr<VfsDirectory>> NCA::GetSubdirectories() const { - if (status != Loader::ResultStatus::Success) +std::vector<VirtualDir> NCA::GetSubdirectories() const { + if (status != Loader::ResultStatus::Success) { return {}; + } return dirs; } @@ -532,7 +537,7 @@ std::string NCA::GetName() const { return file->GetName(); } -std::shared_ptr<VfsDirectory> NCA::GetParentDirectory() const { +VirtualDir NCA::GetParentDirectory() const { return file->GetContainingDirectory(); } diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h index 69292232a..e9eccdea3 100644 --- a/src/core/file_sys/content_archive.h +++ b/src/core/file_sys/content_archive.h @@ -82,7 +82,7 @@ struct NCAHeader { }; static_assert(sizeof(NCAHeader) == 0x400, "NCAHeader has incorrect size."); -inline bool IsDirectoryExeFS(const std::shared_ptr<VfsDirectory>& pfs) { +inline bool IsDirectoryExeFS(const VirtualDir& pfs) { // According to switchbrew, an exefs must only contain these two files: return pfs->GetFile("main") != nullptr && pfs->GetFile("main.npdm") != nullptr; } @@ -104,10 +104,10 @@ public: Loader::ResultStatus GetStatus() const; - std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; - std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; + std::vector<VirtualFile> GetFiles() const override; + std::vector<VirtualDir> GetSubdirectories() const override; std::string GetName() const override; - std::shared_ptr<VfsDirectory> GetParentDirectory() const override; + VirtualDir GetParentDirectory() const override; NCAContentType GetType() const; u64 GetTitleId() const; diff --git a/src/core/file_sys/nca_patch.cpp b/src/core/file_sys/nca_patch.cpp index 5990a2fd5..adcf0732f 100644 --- a/src/core/file_sys/nca_patch.cpp +++ b/src/core/file_sys/nca_patch.cpp @@ -191,7 +191,7 @@ bool BKTR::Resize(std::size_t new_size) { return false; } -std::shared_ptr<VfsDirectory> BKTR::GetContainingDirectory() const { +VirtualDir BKTR::GetContainingDirectory() const { return base_romfs->GetContainingDirectory(); } diff --git a/src/core/file_sys/nca_patch.h b/src/core/file_sys/nca_patch.h index 60c544f8e..503cf473e 100644 --- a/src/core/file_sys/nca_patch.h +++ b/src/core/file_sys/nca_patch.h @@ -106,7 +106,7 @@ public: bool Resize(std::size_t new_size) override; - std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; + VirtualDir GetContainingDirectory() const override; bool IsWritable() const override; diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index e9d1607d0..7c3284df8 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp @@ -12,6 +12,7 @@ #include "common/logging/log.h" #include "common/string_util.h" #include "core/core.h" +#include "core/file_sys/common_funcs.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/control_metadata.h" #include "core/file_sys/ips_layer.h" @@ -30,7 +31,6 @@ namespace FileSys { namespace { constexpr u32 SINGLE_BYTE_MODULUS = 0x100; -constexpr u64 DLC_BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000; constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{ "main", "main.npdm", "rtld", "sdk", "subsdk0", "subsdk1", "subsdk2", @@ -532,7 +532,7 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u dlc_match.reserve(dlc_entries.size()); std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match), [this](const ContentProviderEntry& entry) { - return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == title_id && + return GetBaseTitleID(entry.title_id) == title_id && content_provider.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success; }); diff --git a/src/core/file_sys/romfs_factory.cpp b/src/core/file_sys/romfs_factory.cpp index 987199747..f4e16e4be 100644 --- a/src/core/file_sys/romfs_factory.cpp +++ b/src/core/file_sys/romfs_factory.cpp @@ -7,6 +7,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "core/file_sys/card_image.h" +#include "core/file_sys/common_funcs.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/nca_metadata.h" #include "core/file_sys/patch_manager.h" @@ -47,6 +48,27 @@ ResultVal<VirtualFile> RomFSFactory::OpenCurrentProcess(u64 current_process_titl patch_manager.PatchRomFS(file, ivfc_offset, ContentRecordType::Program, update_raw)); } +ResultVal<VirtualFile> RomFSFactory::OpenPatchedRomFS(u64 title_id, ContentRecordType type) const { + auto nca = content_provider.GetEntry(title_id, type); + + if (nca == nullptr) { + // TODO: Find the right error code to use here + return RESULT_UNKNOWN; + } + + const PatchManager patch_manager{title_id, filesystem_controller, content_provider}; + + return MakeResult<VirtualFile>( + patch_manager.PatchRomFS(nca->GetRomFS(), nca->GetBaseIVFCOffset(), type)); +} + +ResultVal<VirtualFile> RomFSFactory::OpenPatchedRomFSWithProgramIndex( + u64 title_id, u8 program_index, ContentRecordType type) const { + const auto res_title_id = GetBaseTitleIDWithProgramIndex(title_id, program_index); + + return OpenPatchedRomFS(res_title_id, type); +} + ResultVal<VirtualFile> RomFSFactory::Open(u64 title_id, StorageId storage, ContentRecordType type) const { const std::shared_ptr<NCA> res = GetEntry(title_id, storage, type); diff --git a/src/core/file_sys/romfs_factory.h b/src/core/file_sys/romfs_factory.h index ec704dfa8..96dd0d578 100644 --- a/src/core/file_sys/romfs_factory.h +++ b/src/core/file_sys/romfs_factory.h @@ -42,6 +42,10 @@ public: void SetPackedUpdate(VirtualFile update_raw); [[nodiscard]] ResultVal<VirtualFile> OpenCurrentProcess(u64 current_process_title_id) const; + [[nodiscard]] ResultVal<VirtualFile> OpenPatchedRomFS(u64 title_id, + ContentRecordType type) const; + [[nodiscard]] ResultVal<VirtualFile> OpenPatchedRomFSWithProgramIndex( + u64 title_id, u8 program_index, ContentRecordType type) const; [[nodiscard]] ResultVal<VirtualFile> Open(u64 title_id, StorageId storage, ContentRecordType type) const; diff --git a/src/core/file_sys/system_archive/data/font_nintendo_extended.cpp b/src/core/file_sys/system_archive/data/font_nintendo_extended.cpp index 69d62ce8f..29ef110a6 100644 --- a/src/core/file_sys/system_archive/data/font_nintendo_extended.cpp +++ b/src/core/file_sys/system_archive/data/font_nintendo_extended.cpp @@ -6,191 +6,384 @@ namespace FileSys::SystemArchive::SharedFontData { -const std::array<unsigned char, 2932> FONT_NINTENDO_EXTENDED{{ - 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x80, 0x00, 0x03, 0x00, 0x70, 0x44, 0x53, 0x49, 0x47, - 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0b, 0x6c, 0x00, 0x00, 0x00, 0x08, 0x4f, 0x53, 0x2f, 0x32, - 0x33, 0x86, 0x1d, 0x9b, 0x00, 0x00, 0x01, 0x78, 0x00, 0x00, 0x00, 0x60, 0x63, 0x6d, 0x61, 0x70, - 0xc2, 0x06, 0x20, 0xde, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x63, 0x76, 0x74, 0x20, - 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x04, 0x2c, 0x00, 0x00, 0x00, 0x06, 0x66, 0x70, 0x67, 0x6d, - 0x06, 0x59, 0x9c, 0x37, 0x00, 0x00, 0x02, 0xa0, 0x00, 0x00, 0x01, 0x73, 0x67, 0x61, 0x73, 0x70, - 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x0b, 0x64, 0x00, 0x00, 0x00, 0x08, 0x67, 0x6c, 0x79, 0x66, - 0x10, 0x31, 0x88, 0x00, 0x00, 0x00, 0x04, 0x34, 0x00, 0x00, 0x04, 0x64, 0x68, 0x65, 0x61, 0x64, - 0x15, 0x9d, 0xef, 0x91, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x36, 0x68, 0x68, 0x65, 0x61, - 0x09, 0x60, 0x03, 0x71, 0x00, 0x00, 0x01, 0x34, 0x00, 0x00, 0x00, 0x24, 0x68, 0x6d, 0x74, 0x78, - 0x0d, 0x2e, 0x03, 0xa7, 0x00, 0x00, 0x01, 0xd8, 0x00, 0x00, 0x00, 0x26, 0x6c, 0x6f, 0x63, 0x61, - 0x05, 0xc0, 0x04, 0x6c, 0x00, 0x00, 0x08, 0x98, 0x00, 0x00, 0x00, 0x1e, 0x6d, 0x61, 0x78, 0x70, - 0x02, 0x1c, 0x00, 0x5f, 0x00, 0x00, 0x01, 0x58, 0x00, 0x00, 0x00, 0x20, 0x6e, 0x61, 0x6d, 0x65, - 0x7c, 0xe0, 0x84, 0x5c, 0x00, 0x00, 0x08, 0xb8, 0x00, 0x00, 0x02, 0x09, 0x70, 0x6f, 0x73, 0x74, - 0x47, 0x4e, 0x74, 0x19, 0x00, 0x00, 0x0a, 0xc4, 0x00, 0x00, 0x00, 0x9e, 0x70, 0x72, 0x65, 0x70, - 0x1c, 0xfc, 0x7d, 0x9c, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00, 0x16, 0x00, 0x01, 0x00, 0x00, - 0x00, 0x01, 0x00, 0x00, 0x7c, 0xc7, 0xb1, 0x63, 0x5f, 0x0f, 0x3c, 0xf5, 0x00, 0x1b, 0x03, 0xe8, - 0x00, 0x00, 0x00, 0x00, 0xd9, 0x44, 0x2f, 0x5d, 0x00, 0x00, 0x00, 0x00, 0xd9, 0x45, 0x7b, 0x69, - 0x00, 0x00, 0x00, 0x00, 0x03, 0xe6, 0x03, 0xe8, 0x00, 0x00, 0x00, 0x06, 0x00, 0x02, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x03, 0x84, 0xff, 0x83, 0x01, 0xf4, 0x03, 0xe8, - 0x00, 0x00, 0x00, 0x00, 0x03, 0xe6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x5e, - 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03, 0x74, 0x01, 0x90, 0x00, 0x05, - 0x00, 0x04, 0x00, 0xcd, 0x00, 0xcd, 0x00, 0x00, 0x01, 0x1f, 0x00, 0xcd, 0x00, 0xcd, 0x00, 0x00, - 0x03, 0xc3, 0x00, 0x66, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +const std::array<unsigned char, 6024> FONT_NINTENDO_EXTENDED{{ + 0x00, 0x01, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x80, 0x00, 0x03, 0x00, 0x60, 0x4F, 0x53, 0x2F, 0x32, + 0x34, 0x00, 0x1E, 0x26, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x60, 0x63, 0x6D, 0x61, 0x70, + 0xC1, 0xE7, 0xC8, 0xF3, 0x00, 0x00, 0x02, 0x0C, 0x00, 0x00, 0x01, 0x72, 0x63, 0x76, 0x74, 0x20, + 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x05, 0x0C, 0x00, 0x00, 0x00, 0x06, 0x66, 0x70, 0x67, 0x6D, + 0x06, 0x59, 0x9C, 0x37, 0x00, 0x00, 0x03, 0x80, 0x00, 0x00, 0x01, 0x73, 0x67, 0x61, 0x73, 0x70, + 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x17, 0x80, 0x00, 0x00, 0x00, 0x08, 0x67, 0x6C, 0x79, 0x66, + 0x50, 0x0B, 0xEA, 0xFA, 0x00, 0x00, 0x05, 0x50, 0x00, 0x00, 0x0F, 0x04, 0x68, 0x65, 0x61, 0x64, + 0x18, 0x65, 0x81, 0x09, 0x00, 0x00, 0x00, 0xEC, 0x00, 0x00, 0x00, 0x36, 0x68, 0x68, 0x65, 0x61, + 0x09, 0x88, 0x03, 0x86, 0x00, 0x00, 0x01, 0x24, 0x00, 0x00, 0x00, 0x24, 0x68, 0x6D, 0x74, 0x78, + 0x0A, 0xF0, 0x01, 0x94, 0x00, 0x00, 0x01, 0xC8, 0x00, 0x00, 0x00, 0x42, 0x6C, 0x6F, 0x63, 0x61, + 0x34, 0x80, 0x30, 0x6E, 0x00, 0x00, 0x05, 0x14, 0x00, 0x00, 0x00, 0x3A, 0x6D, 0x61, 0x78, 0x70, + 0x02, 0x2C, 0x00, 0x72, 0x00, 0x00, 0x01, 0x48, 0x00, 0x00, 0x00, 0x20, 0x6E, 0x61, 0x6D, 0x65, + 0xDB, 0xC5, 0x42, 0x4D, 0x00, 0x00, 0x14, 0x54, 0x00, 0x00, 0x01, 0xFE, 0x70, 0x6F, 0x73, 0x74, + 0xF4, 0xB4, 0xAC, 0xAB, 0x00, 0x00, 0x16, 0x54, 0x00, 0x00, 0x01, 0x2A, 0x70, 0x72, 0x65, 0x70, + 0x1C, 0xFC, 0x7D, 0x9C, 0x00, 0x00, 0x04, 0xF4, 0x00, 0x00, 0x00, 0x16, 0x00, 0x01, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0xC9, 0x16, 0x5B, 0x71, 0x5F, 0x0F, 0x3C, 0xF5, 0x00, 0x0B, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xD9, 0x44, 0x2F, 0x5D, 0x00, 0x00, 0x00, 0x00, 0xDC, 0x02, 0x0D, 0xA7, + 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x00, 0x00, 0x08, 0x00, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x03, 0x9A, 0xFF, 0x80, 0x02, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0xEC, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x71, + 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03, 0xC4, 0x01, 0x90, 0x00, 0x05, + 0x00, 0x04, 0x00, 0xD2, 0x00, 0xD2, 0x00, 0x00, 0x01, 0x26, 0x00, 0xD2, 0x00, 0xD2, 0x00, 0x00, + 0x03, 0xDA, 0x00, 0x68, 0x02, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x00, 0xc0, 0x00, 0x00, 0xe0, 0xe9, 0x03, 0x84, 0xff, 0x83, - 0x01, 0xf4, 0x02, 0xee, 0x00, 0xfa, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x03, 0xe8, - 0x02, 0xbc, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x03, 0xe8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0xfa, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x00, 0x03, 0xe8, 0x00, 0xeb, 0x01, 0x21, 0x00, 0xff, - 0x00, 0xff, 0x01, 0x3d, 0x01, 0x17, 0x00, 0x42, 0x00, 0x1c, 0x00, 0x3e, 0x00, 0x17, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x68, 0x00, 0x01, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x1c, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x68, 0x00, 0x06, 0x00, 0x4c, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x00, 0xC0, 0x00, 0x0D, 0xE0, 0xF0, 0x03, 0x9A, 0xFF, 0x80, + 0x02, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x02, 0xCD, 0x00, 0x00, 0x00, 0x20, 0x00, 0x01, 0x04, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, + 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, + 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, + 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, + 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6C, + 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x50, 0x00, 0x00, 0x00, 0x10, + 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x20, 0xE0, 0xA9, 0xE0, 0xB4, + 0xE0, 0xE9, 0xE0, 0xF0, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x20, 0xE0, 0xA0, + 0xE0, 0xB3, 0xE0, 0xE0, 0xE0, 0xEF, 0xFF, 0xFF, 0x00, 0x01, 0xFF, 0xF5, 0xFF, 0xE3, 0x1F, 0x64, + 0x1F, 0x5B, 0x1F, 0x30, 0x1F, 0x2B, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x06, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0a, - 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x20, 0xe0, 0xe9, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x20, 0xe0, 0xe0, 0xff, 0xff, 0x00, 0x01, 0xff, 0xf5, - 0xff, 0xe3, 0x1f, 0x24, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xb8, 0x00, 0x00, 0x2c, 0x4b, 0xb8, 0x00, 0x09, 0x50, 0x58, 0xb1, 0x01, 0x01, 0x8e, 0x59, 0xb8, - 0x01, 0xff, 0x85, 0xb8, 0x00, 0x44, 0x1d, 0xb9, 0x00, 0x09, 0x00, 0x03, 0x5f, 0x5e, 0x2d, 0xb8, - 0x00, 0x01, 0x2c, 0x20, 0x20, 0x45, 0x69, 0x44, 0xb0, 0x01, 0x60, 0x2d, 0xb8, 0x00, 0x02, 0x2c, - 0xb8, 0x00, 0x01, 0x2a, 0x21, 0x2d, 0xb8, 0x00, 0x03, 0x2c, 0x20, 0x46, 0xb0, 0x03, 0x25, 0x46, - 0x52, 0x58, 0x23, 0x59, 0x20, 0x8a, 0x20, 0x8a, 0x49, 0x64, 0x8a, 0x20, 0x46, 0x20, 0x68, 0x61, - 0x64, 0xb0, 0x04, 0x25, 0x46, 0x20, 0x68, 0x61, 0x64, 0x52, 0x58, 0x23, 0x65, 0x8a, 0x59, 0x2f, - 0x20, 0xb0, 0x00, 0x53, 0x58, 0x69, 0x20, 0xb0, 0x00, 0x54, 0x58, 0x21, 0xb0, 0x40, 0x59, 0x1b, - 0x69, 0x20, 0xb0, 0x00, 0x54, 0x58, 0x21, 0xb0, 0x40, 0x65, 0x59, 0x59, 0x3a, 0x2d, 0xb8, 0x00, - 0x04, 0x2c, 0x20, 0x46, 0xb0, 0x04, 0x25, 0x46, 0x52, 0x58, 0x23, 0x8a, 0x59, 0x20, 0x46, 0x20, - 0x6a, 0x61, 0x64, 0xb0, 0x04, 0x25, 0x46, 0x20, 0x6a, 0x61, 0x64, 0x52, 0x58, 0x23, 0x8a, 0x59, - 0x2f, 0xfd, 0x2d, 0xb8, 0x00, 0x05, 0x2c, 0x4b, 0x20, 0xb0, 0x03, 0x26, 0x50, 0x58, 0x51, 0x58, - 0xb0, 0x80, 0x44, 0x1b, 0xb0, 0x40, 0x44, 0x59, 0x1b, 0x21, 0x21, 0x20, 0x45, 0xb0, 0xc0, 0x50, - 0x58, 0xb0, 0xc0, 0x44, 0x1b, 0x21, 0x59, 0x59, 0x2d, 0xb8, 0x00, 0x06, 0x2c, 0x20, 0x20, 0x45, - 0x69, 0x44, 0xb0, 0x01, 0x60, 0x20, 0x20, 0x45, 0x7d, 0x69, 0x18, 0x44, 0xb0, 0x01, 0x60, 0x2d, - 0xb8, 0x00, 0x07, 0x2c, 0xb8, 0x00, 0x06, 0x2a, 0x2d, 0xb8, 0x00, 0x08, 0x2c, 0x4b, 0x20, 0xb0, - 0x03, 0x26, 0x53, 0x58, 0xb0, 0x40, 0x1b, 0xb0, 0x00, 0x59, 0x8a, 0x8a, 0x20, 0xb0, 0x03, 0x26, - 0x53, 0x58, 0x23, 0x21, 0xb0, 0x80, 0x8a, 0x8a, 0x1b, 0x8a, 0x23, 0x59, 0x20, 0xb0, 0x03, 0x26, - 0x53, 0x58, 0x23, 0x21, 0xb8, 0x00, 0xc0, 0x8a, 0x8a, 0x1b, 0x8a, 0x23, 0x59, 0x20, 0xb0, 0x03, - 0x26, 0x53, 0x58, 0x23, 0x21, 0xb8, 0x01, 0x00, 0x8a, 0x8a, 0x1b, 0x8a, 0x23, 0x59, 0x20, 0xb0, - 0x03, 0x26, 0x53, 0x58, 0x23, 0x21, 0xb8, 0x01, 0x40, 0x8a, 0x8a, 0x1b, 0x8a, 0x23, 0x59, 0x20, - 0xb8, 0x00, 0x03, 0x26, 0x53, 0x58, 0xb0, 0x03, 0x25, 0x45, 0xb8, 0x01, 0x80, 0x50, 0x58, 0x23, - 0x21, 0xb8, 0x01, 0x80, 0x23, 0x21, 0x1b, 0xb0, 0x03, 0x25, 0x45, 0x23, 0x21, 0x23, 0x21, 0x59, - 0x1b, 0x21, 0x59, 0x44, 0x2d, 0xb8, 0x00, 0x09, 0x2c, 0x4b, 0x53, 0x58, 0x45, 0x44, 0x1b, 0x21, - 0x21, 0x59, 0x2d, 0x00, 0xb8, 0x00, 0x00, 0x2b, 0x00, 0xba, 0x00, 0x01, 0x00, 0x01, 0x00, 0x07, - 0x2b, 0xb8, 0x00, 0x00, 0x20, 0x45, 0x7d, 0x69, 0x18, 0x44, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x03, 0xe6, 0x03, 0xe8, 0x00, 0x06, - 0x00, 0x00, 0x35, 0x01, 0x33, 0x15, 0x01, 0x23, 0x35, 0x03, 0x52, 0x94, 0xfc, 0xa6, 0x8c, 0x90, - 0x03, 0x58, 0x86, 0xfc, 0xa0, 0x8e, 0x00, 0x00, 0x00, 0x02, 0x00, 0xeb, 0x00, 0xcc, 0x02, 0xfb, - 0x03, 0x1e, 0x00, 0x08, 0x00, 0x0f, 0x00, 0x00, 0x01, 0x33, 0x13, 0x23, 0x27, 0x23, 0x07, 0x23, - 0x13, 0x17, 0x07, 0x06, 0x15, 0x33, 0x27, 0x07, 0x01, 0xbc, 0x6d, 0xd2, 0x7c, 0x26, 0xcc, 0x26, - 0x7c, 0xd1, 0x35, 0x40, 0x02, 0x89, 0x45, 0x02, 0x03, 0x1e, 0xfd, 0xae, 0x77, 0x77, 0x02, 0x52, - 0x9b, 0xcc, 0x08, 0x04, 0xda, 0x02, 0x00, 0x00, 0x00, 0x03, 0x01, 0x21, 0x00, 0xcc, 0x02, 0xc5, - 0x03, 0x1e, 0x00, 0x15, 0x00, 0x1f, 0x00, 0x2b, 0x00, 0x00, 0x25, 0x11, 0x33, 0x32, 0x1e, 0x02, - 0x15, 0x14, 0x0e, 0x02, 0x07, 0x1e, 0x01, 0x15, 0x14, 0x0e, 0x02, 0x2b, 0x01, 0x13, 0x33, 0x32, - 0x36, 0x35, 0x34, 0x26, 0x2b, 0x01, 0x1d, 0x01, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x26, 0x2b, - 0x01, 0x15, 0x01, 0x21, 0xea, 0x25, 0x3f, 0x2e, 0x1a, 0x0e, 0x15, 0x1b, 0x0e, 0x2d, 0x2d, 0x1a, - 0x2e, 0x3f, 0x25, 0xf8, 0x76, 0x62, 0x20, 0x2a, 0x28, 0x22, 0x62, 0x76, 0x10, 0x18, 0x11, 0x09, - 0x22, 0x22, 0x74, 0xcc, 0x02, 0x52, 0x18, 0x2b, 0x3c, 0x24, 0x1d, 0x1f, 0x17, 0x17, 0x14, 0x0f, - 0x48, 0x2f, 0x24, 0x3f, 0x2e, 0x1a, 0x01, 0x5b, 0x29, 0x20, 0x20, 0x2b, 0x94, 0xf8, 0x0e, 0x16, - 0x1c, 0x0e, 0x1f, 0x31, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0xff, 0x00, 0xcc, 0x02, 0xe7, - 0x03, 0x1e, 0x00, 0x0c, 0x00, 0x00, 0x01, 0x33, 0x17, 0x37, 0x33, 0x03, 0x13, 0x23, 0x27, 0x07, - 0x23, 0x13, 0x03, 0x01, 0x04, 0x86, 0x69, 0x69, 0x86, 0xa3, 0xa8, 0x88, 0x6c, 0x6c, 0x88, 0xa8, - 0xa3, 0x03, 0x1e, 0xcb, 0xcb, 0xfe, 0xda, 0xfe, 0xd4, 0xcf, 0xcf, 0x01, 0x2c, 0x01, 0x26, 0x00, - 0x00, 0x01, 0x00, 0xff, 0x00, 0xcc, 0x02, 0xe7, 0x03, 0x1e, 0x00, 0x0f, 0x00, 0x00, 0x01, 0x03, - 0x33, 0x17, 0x32, 0x15, 0x1e, 0x01, 0x15, 0x1b, 0x01, 0x33, 0x03, 0x15, 0x23, 0x35, 0x01, 0xb8, - 0xb9, 0x7e, 0x01, 0x01, 0x01, 0x03, 0x70, 0x75, 0x7f, 0xb9, 0x76, 0x01, 0xa3, 0x01, 0x7b, 0x01, - 0x01, 0x01, 0x05, 0x02, 0xff, 0x00, 0x01, 0x0a, 0xfe, 0x85, 0xd7, 0xd7, 0x00, 0x01, 0x01, 0x3d, - 0x00, 0xcc, 0x02, 0xa9, 0x03, 0x1e, 0x00, 0x06, 0x00, 0x00, 0x25, 0x11, 0x33, 0x11, 0x33, 0x15, - 0x21, 0x01, 0x3d, 0x75, 0xf7, 0xfe, 0x94, 0xcc, 0x02, 0x52, 0xfe, 0x10, 0x62, 0x00, 0x00, 0x00, - 0x00, 0x02, 0x01, 0x17, 0x00, 0xbc, 0x02, 0xcf, 0x03, 0x0e, 0x00, 0x15, 0x00, 0x21, 0x00, 0x00, - 0x25, 0x11, 0x33, 0x32, 0x1e, 0x02, 0x1d, 0x01, 0x0e, 0x03, 0x1d, 0x01, 0x17, 0x15, 0x23, 0x27, - 0x23, 0x15, 0x23, 0x13, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x26, 0x2b, 0x01, 0x15, 0x01, 0x17, - 0xf4, 0x27, 0x40, 0x2e, 0x19, 0x01, 0x1f, 0x24, 0x1e, 0x78, 0x7d, 0x6a, 0x5c, 0x75, 0x76, 0x72, - 0x12, 0x19, 0x11, 0x08, 0x26, 0x26, 0x6a, 0xbc, 0x02, 0x52, 0x1d, 0x31, 0x42, 0x25, 0x16, 0x18, - 0x32, 0x2a, 0x1b, 0x02, 0x01, 0xef, 0x06, 0xd7, 0xd7, 0x01, 0x3f, 0x10, 0x1a, 0x1e, 0x0f, 0x23, - 0x36, 0xb0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x42, 0x00, 0xbc, 0x03, 0xa4, 0x03, 0x0e, 0x00, 0x0a, - 0x00, 0x11, 0x00, 0x00, 0x13, 0x35, 0x21, 0x15, 0x01, 0x21, 0x15, 0x21, 0x35, 0x01, 0x21, 0x01, - 0x11, 0x33, 0x11, 0x33, 0x15, 0x21, 0x42, 0x01, 0xa7, 0xfe, 0xeb, 0x01, 0x1b, 0xfe, 0x53, 0x01, - 0x15, 0xfe, 0xeb, 0x01, 0xf7, 0x75, 0xf6, 0xfe, 0x95, 0x02, 0xac, 0x62, 0x45, 0xfe, 0x55, 0x62, - 0x47, 0x01, 0xa9, 0xfe, 0x10, 0x02, 0x52, 0xfe, 0x10, 0x62, 0x00, 0x00, 0x00, 0x03, 0x00, 0x1c, - 0x00, 0xbc, 0x03, 0xca, 0x03, 0x0e, 0x00, 0x0a, 0x00, 0x21, 0x00, 0x2f, 0x00, 0x00, 0x13, 0x35, - 0x21, 0x15, 0x01, 0x21, 0x15, 0x21, 0x35, 0x01, 0x21, 0x01, 0x11, 0x33, 0x32, 0x1e, 0x02, 0x15, - 0x14, 0x06, 0x07, 0x0e, 0x03, 0x15, 0x17, 0x15, 0x23, 0x27, 0x23, 0x15, 0x23, 0x13, 0x33, 0x32, - 0x3e, 0x02, 0x35, 0x34, 0x2e, 0x02, 0x2b, 0x01, 0x15, 0x1c, 0x01, 0xa7, 0xfe, 0xeb, 0x01, 0x1b, - 0xfe, 0x53, 0x01, 0x15, 0xfe, 0xeb, 0x01, 0xf7, 0xf3, 0x27, 0x41, 0x2d, 0x19, 0x1c, 0x20, 0x01, - 0x0d, 0x0e, 0x0a, 0x78, 0x7d, 0x69, 0x5c, 0x75, 0x76, 0x71, 0x11, 0x1a, 0x12, 0x09, 0x0a, 0x14, - 0x1d, 0x13, 0x69, 0x02, 0xac, 0x62, 0x45, 0xfe, 0x55, 0x62, 0x47, 0x01, 0xa9, 0xfe, 0x10, 0x02, - 0x52, 0x1d, 0x31, 0x42, 0x25, 0x2b, 0x44, 0x1d, 0x01, 0x08, 0x09, 0x07, 0x01, 0xf1, 0x06, 0xd7, - 0xd7, 0x01, 0x3f, 0x11, 0x19, 0x1f, 0x0e, 0x11, 0x20, 0x19, 0x0f, 0xb0, 0x00, 0x02, 0x00, 0x3e, - 0x00, 0xb3, 0x03, 0xa8, 0x03, 0x17, 0x00, 0x3a, 0x00, 0x41, 0x00, 0x00, 0x13, 0x34, 0x3e, 0x02, - 0x33, 0x32, 0x1e, 0x02, 0x15, 0x23, 0x27, 0x34, 0x27, 0x2e, 0x01, 0x23, 0x22, 0x0e, 0x02, 0x15, - 0x14, 0x16, 0x15, 0x1e, 0x05, 0x15, 0x14, 0x0e, 0x02, 0x23, 0x22, 0x2e, 0x02, 0x35, 0x33, 0x1e, - 0x01, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x2e, 0x04, 0x35, 0x01, 0x11, 0x33, 0x11, 0x33, 0x15, - 0x21, 0x50, 0x24, 0x3b, 0x4a, 0x27, 0x28, 0x4b, 0x39, 0x22, 0x73, 0x01, 0x01, 0x08, 0x2b, 0x29, - 0x10, 0x20, 0x19, 0x0f, 0x01, 0x0b, 0x35, 0x41, 0x46, 0x3b, 0x25, 0x23, 0x3a, 0x4b, 0x27, 0x2b, - 0x50, 0x3f, 0x26, 0x74, 0x05, 0x34, 0x33, 0x10, 0x20, 0x1a, 0x11, 0x2c, 0x42, 0x4d, 0x42, 0x2c, - 0x01, 0xef, 0x73, 0xf6, 0xfe, 0x97, 0x02, 0x70, 0x2a, 0x3f, 0x2a, 0x14, 0x18, 0x2e, 0x44, 0x2c, - 0x02, 0x03, 0x01, 0x27, 0x27, 0x07, 0x10, 0x1a, 0x12, 0x02, 0x0b, 0x02, 0x1f, 0x22, 0x19, 0x17, - 0x27, 0x3f, 0x34, 0x2c, 0x3e, 0x28, 0x13, 0x1a, 0x32, 0x48, 0x2e, 0x30, 0x30, 0x06, 0x0f, 0x1a, - 0x13, 0x21, 0x27, 0x1e, 0x1b, 0x29, 0x3e, 0x31, 0xfe, 0x4c, 0x02, 0x53, 0xfe, 0x10, 0x63, 0x00, - 0x00, 0x03, 0x00, 0x17, 0x00, 0xb3, 0x03, 0xce, 0x03, 0x17, 0x00, 0x38, 0x00, 0x4f, 0x00, 0x5d, - 0x00, 0x00, 0x13, 0x34, 0x3e, 0x02, 0x33, 0x32, 0x1e, 0x02, 0x15, 0x23, 0x27, 0x34, 0x23, 0x2e, - 0x01, 0x23, 0x22, 0x0e, 0x02, 0x15, 0x14, 0x1e, 0x04, 0x15, 0x14, 0x0e, 0x02, 0x23, 0x22, 0x2e, - 0x02, 0x35, 0x33, 0x1e, 0x01, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x26, 0x27, 0x2e, 0x03, 0x35, - 0x01, 0x11, 0x33, 0x32, 0x1e, 0x02, 0x15, 0x14, 0x06, 0x07, 0x30, 0x0e, 0x02, 0x31, 0x17, 0x15, - 0x23, 0x27, 0x23, 0x15, 0x23, 0x13, 0x33, 0x32, 0x3e, 0x02, 0x35, 0x34, 0x2e, 0x02, 0x2b, 0x01, - 0x15, 0x2a, 0x24, 0x3a, 0x4a, 0x26, 0x29, 0x4b, 0x39, 0x23, 0x73, 0x01, 0x01, 0x08, 0x2a, 0x2a, - 0x10, 0x1f, 0x1a, 0x10, 0x2c, 0x42, 0x4d, 0x42, 0x2c, 0x23, 0x39, 0x4b, 0x27, 0x2b, 0x51, 0x3f, - 0x27, 0x75, 0x05, 0x34, 0x33, 0x10, 0x20, 0x1a, 0x10, 0x1f, 0x1c, 0x25, 0x53, 0x47, 0x2e, 0x01, - 0xed, 0xf3, 0x27, 0x41, 0x2d, 0x19, 0x1c, 0x20, 0x0c, 0x0e, 0x0c, 0x78, 0x7d, 0x68, 0x5d, 0x75, - 0x76, 0x71, 0x11, 0x1a, 0x12, 0x09, 0x0a, 0x14, 0x1d, 0x13, 0x69, 0x02, 0x71, 0x2a, 0x3e, 0x2a, - 0x14, 0x18, 0x2e, 0x44, 0x2c, 0x02, 0x02, 0x27, 0x29, 0x07, 0x11, 0x1a, 0x12, 0x1d, 0x24, 0x1c, - 0x1d, 0x2b, 0x40, 0x32, 0x2c, 0x3f, 0x29, 0x13, 0x1a, 0x31, 0x49, 0x2e, 0x30, 0x30, 0x06, 0x0f, - 0x19, 0x13, 0x1e, 0x22, 0x0b, 0x0e, 0x20, 0x2f, 0x43, 0x30, 0xfe, 0x4b, 0x02, 0x52, 0x1d, 0x32, - 0x42, 0x25, 0x2c, 0x42, 0x1d, 0x08, 0x0a, 0x08, 0xf1, 0x06, 0xd7, 0xd7, 0x01, 0x3f, 0x11, 0x19, - 0x1f, 0x0e, 0x11, 0x20, 0x19, 0x0f, 0xb0, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x12, 0x00, 0x12, - 0x00, 0x12, 0x00, 0x32, 0x00, 0x72, 0x00, 0x8e, 0x00, 0xac, 0x00, 0xbe, 0x00, 0xf0, 0x01, 0x14, - 0x01, 0x5c, 0x01, 0xb6, 0x02, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0xa2, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x02, 0x00, 0x07, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x2f, - 0x00, 0x17, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x12, 0x00, 0x46, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x58, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x06, 0x00, 0x12, 0x00, 0x65, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x01, 0x00, 0x20, - 0x00, 0x77, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x02, 0x00, 0x0e, 0x00, 0x97, 0x00, 0x03, - 0x00, 0x01, 0x04, 0x09, 0x00, 0x03, 0x00, 0x5e, 0x00, 0xa5, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, - 0x00, 0x04, 0x00, 0x24, 0x01, 0x03, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x05, 0x00, 0x1a, - 0x01, 0x27, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x06, 0x00, 0x24, 0x01, 0x41, 0x00, 0x03, - 0x00, 0x01, 0x04, 0x09, 0x00, 0x11, 0x00, 0x02, 0x01, 0x65, 0x59, 0x75, 0x7a, 0x75, 0x4f, 0x53, - 0x53, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x67, 0x75, 0x6c, 0x61, - 0x72, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x30, 0x30, 0x30, 0x3b, 0x3b, - 0x59, 0x75, 0x7a, 0x75, 0x4f, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, - 0x2d, 0x52, 0x3b, 0x32, 0x30, 0x31, 0x39, 0x3b, 0x46, 0x4c, 0x56, 0x49, 0x2d, 0x36, 0x31, 0x34, - 0x59, 0x75, 0x7a, 0x75, 0x4f, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, - 0x20, 0x52, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x30, 0x30, 0x30, 0x59, - 0x75, 0x7a, 0x75, 0x4f, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x2d, - 0x52, 0x00, 0x59, 0x00, 0x75, 0x00, 0x7a, 0x00, 0x75, 0x00, 0x4f, 0x00, 0x53, 0x00, 0x53, 0x00, - 0x45, 0x00, 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, - 0x6e, 0x00, 0x52, 0x00, 0x65, 0x00, 0x67, 0x00, 0x75, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x72, 0x00, - 0x56, 0x00, 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00, 0x20, 0x00, - 0x31, 0x00, 0x2e, 0x00, 0x30, 0x00, 0x30, 0x00, 0x30, 0x00, 0x3b, 0x00, 0x3b, 0x00, 0x59, 0x00, - 0x75, 0x00, 0x7a, 0x00, 0x75, 0x00, 0x4f, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00, 0x78, 0x00, - 0x74, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00, 0x2d, 0x00, - 0x52, 0x00, 0x3b, 0x00, 0x32, 0x00, 0x30, 0x00, 0x31, 0x00, 0x39, 0x00, 0x3b, 0x00, 0x46, 0x00, - 0x4c, 0x00, 0x56, 0x00, 0x49, 0x00, 0x2d, 0x00, 0x36, 0x00, 0x31, 0x00, 0x34, 0x00, 0x59, 0x00, - 0x75, 0x00, 0x7a, 0x00, 0x75, 0x00, 0x4f, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00, 0x78, 0x00, - 0x74, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00, 0x20, 0x00, - 0x52, 0x00, 0x56, 0x00, 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00, - 0x20, 0x00, 0x31, 0x00, 0x2e, 0x00, 0x30, 0x00, 0x30, 0x00, 0x30, 0x00, 0x59, 0x00, 0x75, 0x00, - 0x7a, 0x00, 0x75, 0x00, 0x4f, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00, 0x78, 0x00, 0x74, 0x00, - 0x65, 0x00, 0x6e, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6f, 0x00, 0x6e, 0x00, 0x2d, 0x00, 0x52, 0x00, - 0x52, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x9c, 0x00, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x01, 0x02, 0x01, 0x03, 0x00, 0x03, 0x01, 0x04, - 0x01, 0x05, 0x01, 0x06, 0x01, 0x07, 0x01, 0x08, 0x01, 0x09, 0x01, 0x0a, 0x01, 0x0b, 0x01, 0x0c, - 0x01, 0x0d, 0x07, 0x75, 0x6e, 0x69, 0x30, 0x30, 0x30, 0x30, 0x07, 0x75, 0x6e, 0x69, 0x30, 0x30, - 0x30, 0x44, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x30, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, - 0x45, 0x31, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x32, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, - 0x45, 0x33, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x34, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, - 0x45, 0x35, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x36, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, - 0x45, 0x37, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, 0x45, 0x38, 0x07, 0x75, 0x6e, 0x69, 0x45, 0x30, - 0x45, 0x39, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xff, 0xff, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x01, - 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xB8, 0x00, 0x00, 0x2C, 0x4B, 0xB8, 0x00, 0x09, 0x50, 0x58, 0xB1, 0x01, 0x01, 0x8E, 0x59, 0xB8, + 0x01, 0xFF, 0x85, 0xB8, 0x00, 0x44, 0x1D, 0xB9, 0x00, 0x09, 0x00, 0x03, 0x5F, 0x5E, 0x2D, 0xB8, + 0x00, 0x01, 0x2C, 0x20, 0x20, 0x45, 0x69, 0x44, 0xB0, 0x01, 0x60, 0x2D, 0xB8, 0x00, 0x02, 0x2C, + 0xB8, 0x00, 0x01, 0x2A, 0x21, 0x2D, 0xB8, 0x00, 0x03, 0x2C, 0x20, 0x46, 0xB0, 0x03, 0x25, 0x46, + 0x52, 0x58, 0x23, 0x59, 0x20, 0x8A, 0x20, 0x8A, 0x49, 0x64, 0x8A, 0x20, 0x46, 0x20, 0x68, 0x61, + 0x64, 0xB0, 0x04, 0x25, 0x46, 0x20, 0x68, 0x61, 0x64, 0x52, 0x58, 0x23, 0x65, 0x8A, 0x59, 0x2F, + 0x20, 0xB0, 0x00, 0x53, 0x58, 0x69, 0x20, 0xB0, 0x00, 0x54, 0x58, 0x21, 0xB0, 0x40, 0x59, 0x1B, + 0x69, 0x20, 0xB0, 0x00, 0x54, 0x58, 0x21, 0xB0, 0x40, 0x65, 0x59, 0x59, 0x3A, 0x2D, 0xB8, 0x00, + 0x04, 0x2C, 0x20, 0x46, 0xB0, 0x04, 0x25, 0x46, 0x52, 0x58, 0x23, 0x8A, 0x59, 0x20, 0x46, 0x20, + 0x6A, 0x61, 0x64, 0xB0, 0x04, 0x25, 0x46, 0x20, 0x6A, 0x61, 0x64, 0x52, 0x58, 0x23, 0x8A, 0x59, + 0x2F, 0xFD, 0x2D, 0xB8, 0x00, 0x05, 0x2C, 0x4B, 0x20, 0xB0, 0x03, 0x26, 0x50, 0x58, 0x51, 0x58, + 0xB0, 0x80, 0x44, 0x1B, 0xB0, 0x40, 0x44, 0x59, 0x1B, 0x21, 0x21, 0x20, 0x45, 0xB0, 0xC0, 0x50, + 0x58, 0xB0, 0xC0, 0x44, 0x1B, 0x21, 0x59, 0x59, 0x2D, 0xB8, 0x00, 0x06, 0x2C, 0x20, 0x20, 0x45, + 0x69, 0x44, 0xB0, 0x01, 0x60, 0x20, 0x20, 0x45, 0x7D, 0x69, 0x18, 0x44, 0xB0, 0x01, 0x60, 0x2D, + 0xB8, 0x00, 0x07, 0x2C, 0xB8, 0x00, 0x06, 0x2A, 0x2D, 0xB8, 0x00, 0x08, 0x2C, 0x4B, 0x20, 0xB0, + 0x03, 0x26, 0x53, 0x58, 0xB0, 0x40, 0x1B, 0xB0, 0x00, 0x59, 0x8A, 0x8A, 0x20, 0xB0, 0x03, 0x26, + 0x53, 0x58, 0x23, 0x21, 0xB0, 0x80, 0x8A, 0x8A, 0x1B, 0x8A, 0x23, 0x59, 0x20, 0xB0, 0x03, 0x26, + 0x53, 0x58, 0x23, 0x21, 0xB8, 0x00, 0xC0, 0x8A, 0x8A, 0x1B, 0x8A, 0x23, 0x59, 0x20, 0xB0, 0x03, + 0x26, 0x53, 0x58, 0x23, 0x21, 0xB8, 0x01, 0x00, 0x8A, 0x8A, 0x1B, 0x8A, 0x23, 0x59, 0x20, 0xB0, + 0x03, 0x26, 0x53, 0x58, 0x23, 0x21, 0xB8, 0x01, 0x40, 0x8A, 0x8A, 0x1B, 0x8A, 0x23, 0x59, 0x20, + 0xB8, 0x00, 0x03, 0x26, 0x53, 0x58, 0xB0, 0x03, 0x25, 0x45, 0xB8, 0x01, 0x80, 0x50, 0x58, 0x23, + 0x21, 0xB8, 0x01, 0x80, 0x23, 0x21, 0x1B, 0xB0, 0x03, 0x25, 0x45, 0x23, 0x21, 0x23, 0x21, 0x59, + 0x1B, 0x21, 0x59, 0x44, 0x2D, 0xB8, 0x00, 0x09, 0x2C, 0x4B, 0x53, 0x58, 0x45, 0x44, 0x1B, 0x21, + 0x21, 0x59, 0x2D, 0x00, 0xB8, 0x00, 0x00, 0x2B, 0x00, 0xBA, 0x00, 0x01, 0x00, 0x01, 0x00, 0x07, + 0x2B, 0xB8, 0x00, 0x00, 0x20, 0x45, 0x7D, 0x69, 0x18, 0x44, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00, 0x16, 0x00, 0x16, 0x00, 0x16, 0x00, 0x70, + 0x00, 0xDC, 0x01, 0x34, 0x01, 0x7C, 0x01, 0xA2, 0x01, 0xF4, 0x02, 0x3C, 0x02, 0xA8, 0x03, 0x4C, + 0x03, 0xE2, 0x04, 0x20, 0x04, 0x58, 0x04, 0x9A, 0x04, 0xEE, 0x05, 0x32, 0x05, 0x64, 0x05, 0x80, + 0x05, 0xC6, 0x05, 0xF6, 0x06, 0x54, 0x06, 0xB2, 0x07, 0x38, 0x07, 0x60, 0x07, 0x82, 0x00, 0x00, + 0x00, 0x02, 0x00, 0xA4, 0xFF, 0xFF, 0x03, 0x5C, 0x03, 0x09, 0x00, 0x03, 0x00, 0x07, 0x00, 0x00, + 0x13, 0x11, 0x21, 0x11, 0x25, 0x21, 0x11, 0x21, 0xCD, 0x02, 0x66, 0xFD, 0x71, 0x02, 0xB8, 0xFD, + 0x48, 0x02, 0xE0, 0xFD, 0x48, 0x02, 0xB8, 0x29, 0xFC, 0xF6, 0x00, 0x00, 0x00, 0x04, 0x00, 0x14, + 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1F, 0x00, 0x2F, 0x00, 0x39, 0x00, 0x00, + 0x00, 0x22, 0x0E, 0x02, 0x14, 0x1E, 0x02, 0x32, 0x3E, 0x02, 0x34, 0x2E, 0x01, 0x24, 0x32, 0x1E, + 0x02, 0x14, 0x0E, 0x02, 0x22, 0x2E, 0x02, 0x34, 0x3E, 0x01, 0x13, 0x12, 0x37, 0x33, 0x13, 0x12, + 0x15, 0x16, 0x23, 0x2F, 0x01, 0x23, 0x07, 0x23, 0x22, 0x26, 0x25, 0x30, 0x27, 0x26, 0x2F, 0x01, + 0x06, 0x07, 0x06, 0x32, 0x02, 0x5A, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, + 0x46, 0x46, 0x77, 0xFE, 0x9E, 0xC8, 0xB7, 0x83, 0x4E, 0x4E, 0x83, 0xB7, 0xC8, 0xB7, 0x83, 0x4E, + 0x4E, 0x83, 0x23, 0x6C, 0x5E, 0x6D, 0x68, 0x68, 0x01, 0x39, 0x38, 0x2E, 0xD1, 0x2B, 0x37, 0x33, + 0x04, 0x01, 0x48, 0x1D, 0x1C, 0x0A, 0x05, 0x01, 0x45, 0x01, 0x89, 0x03, 0x3F, 0x46, 0x77, 0xA4, + 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x4E, 0x83, 0xB7, 0xC8, 0xB7, + 0x83, 0x4E, 0x4E, 0x83, 0xB7, 0xC8, 0xB7, 0x83, 0xFD, 0x64, 0x01, 0x1A, 0xEB, 0xFE, 0xFE, 0xFE, + 0xFD, 0x03, 0x01, 0x01, 0x77, 0x78, 0x01, 0xCF, 0x4C, 0x4C, 0x1C, 0x0C, 0x02, 0xBE, 0x02, 0x00, + 0x00, 0x05, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1B, 0x00, 0x2F, + 0x00, 0x3A, 0x00, 0x44, 0x00, 0x00, 0x12, 0x14, 0x1E, 0x02, 0x32, 0x3E, 0x02, 0x34, 0x2E, 0x02, + 0x22, 0x0E, 0x01, 0x02, 0x10, 0x3E, 0x01, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x26, 0x01, + 0x16, 0x17, 0x14, 0x06, 0x07, 0x06, 0x2B, 0x01, 0x19, 0x01, 0x17, 0x32, 0x17, 0x16, 0x17, 0x16, + 0x07, 0x06, 0x0F, 0x01, 0x36, 0x37, 0x34, 0x2E, 0x01, 0x27, 0x23, 0x15, 0x33, 0x32, 0x27, 0x32, + 0x37, 0x36, 0x26, 0x27, 0x26, 0x2B, 0x01, 0x15, 0x45, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, + 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, + 0xF4, 0xE2, 0x01, 0xF7, 0x61, 0x01, 0x4E, 0x3E, 0x29, 0xAF, 0x4E, 0x81, 0x8B, 0x1D, 0x3C, 0x1F, + 0x19, 0x04, 0x06, 0x39, 0x57, 0x44, 0x01, 0x1B, 0x2D, 0x51, 0x46, 0x46, 0x47, 0x66, 0x70, 0x16, + 0x1F, 0x01, 0x2C, 0x08, 0x4B, 0x4C, 0x01, 0xDE, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, + 0xA4, 0x77, 0x46, 0x46, 0x77, 0xFE, 0x7C, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, + 0x84, 0x84, 0x01, 0x6D, 0x21, 0x5B, 0x40, 0x50, 0x05, 0x03, 0x01, 0x03, 0x01, 0x05, 0x01, 0x05, + 0x09, 0x30, 0x25, 0x29, 0x40, 0x21, 0xC2, 0x06, 0x3E, 0x1A, 0x21, 0x0B, 0x01, 0x8C, 0xE1, 0x0A, + 0x0E, 0x54, 0x0B, 0x02, 0x79, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, + 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1B, 0x00, 0x38, 0x00, 0x00, 0x12, 0x14, 0x1E, 0x02, 0x32, 0x3E, + 0x02, 0x34, 0x2E, 0x02, 0x22, 0x0E, 0x01, 0x02, 0x10, 0x3E, 0x01, 0x20, 0x1E, 0x01, 0x10, 0x0E, + 0x01, 0x20, 0x26, 0x36, 0x34, 0x3F, 0x01, 0x27, 0x26, 0x27, 0x33, 0x17, 0x16, 0x33, 0x36, 0x3F, + 0x02, 0x32, 0x14, 0x06, 0x16, 0x12, 0x14, 0x2B, 0x01, 0x27, 0x26, 0x06, 0x0F, 0x01, 0x23, 0x45, + 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2, + 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x7B, 0x58, 0x58, 0x4D, 0x4F, 0x05, 0x7A, + 0x34, 0x34, 0x02, 0x01, 0x33, 0x32, 0x3C, 0x3C, 0xA1, 0x01, 0xB0, 0x3E, 0x3F, 0x39, 0x3B, 0x02, + 0x3A, 0x38, 0x3F, 0x01, 0xDE, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, + 0x46, 0x77, 0xFE, 0x7C, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x60, + 0x02, 0x87, 0x88, 0x79, 0x7A, 0x06, 0x54, 0x54, 0x01, 0x53, 0x53, 0x01, 0x01, 0xFB, 0x04, 0xFE, + 0xF8, 0x02, 0x5B, 0x5A, 0x03, 0x59, 0x59, 0x00, 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, + 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1B, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x22, 0x0E, 0x02, 0x14, 0x1E, + 0x02, 0x32, 0x3E, 0x02, 0x34, 0x2E, 0x01, 0x24, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, + 0x01, 0x10, 0x36, 0x01, 0x35, 0x27, 0x26, 0x34, 0x3B, 0x01, 0x17, 0x16, 0x36, 0x3F, 0x01, 0x33, + 0x03, 0x15, 0x23, 0x02, 0x5A, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, + 0x46, 0x77, 0xFE, 0x7C, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x01, + 0x36, 0x5E, 0x5F, 0x3C, 0x3D, 0x3D, 0x3D, 0x03, 0x3B, 0x3B, 0x77, 0xBE, 0x68, 0x03, 0x3F, 0x46, + 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2, 0xFE, + 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFD, 0xF9, 0x6E, 0x96, 0x95, 0x01, 0x67, 0x67, + 0x03, 0x66, 0x65, 0xFE, 0xD3, 0xDA, 0x00, 0x00, 0x00, 0x03, 0x00, 0x14, 0xFF, 0xBD, 0x03, 0xEC, + 0x03, 0x4B, 0x00, 0x06, 0x00, 0x0C, 0x00, 0x12, 0x00, 0x00, 0x01, 0x21, 0x22, 0x15, 0x30, 0x11, + 0x21, 0x17, 0x21, 0x11, 0x10, 0x25, 0x21, 0x01, 0x11, 0x33, 0x11, 0x21, 0x15, 0x03, 0xBB, 0xFD, + 0x77, 0xED, 0x03, 0x76, 0x31, 0xFC, 0x28, 0x01, 0x1E, 0x02, 0xBA, 0xFD, 0x5C, 0x68, 0x01, 0x08, + 0x03, 0x1A, 0xEE, 0xFD, 0xC2, 0x31, 0x02, 0x6F, 0x01, 0x1E, 0x01, 0xFD, 0x36, 0x02, 0x07, 0xFE, + 0x50, 0x57, 0x00, 0x00, 0x00, 0x04, 0x00, 0x14, 0xFF, 0xBD, 0x03, 0xEC, 0x03, 0x4B, 0x00, 0x06, + 0x00, 0x0C, 0x00, 0x27, 0x00, 0x32, 0x00, 0x00, 0x05, 0x11, 0x34, 0x27, 0x30, 0x21, 0x11, 0x07, + 0x11, 0x21, 0x20, 0x19, 0x01, 0x25, 0x11, 0x33, 0x32, 0x17, 0x16, 0x17, 0x16, 0x17, 0x16, 0x07, + 0x06, 0x07, 0x06, 0x07, 0x1E, 0x02, 0x15, 0x07, 0x23, 0x27, 0x2E, 0x01, 0x2F, 0x01, 0x15, 0x13, + 0x36, 0x35, 0x34, 0x27, 0x26, 0x27, 0x23, 0x15, 0x33, 0x36, 0x03, 0xBB, 0xED, 0xFD, 0x77, 0x31, + 0x02, 0xBA, 0x01, 0x1E, 0xFD, 0x2A, 0x77, 0x76, 0x15, 0x49, 0x20, 0x35, 0x08, 0x04, 0x06, 0x13, + 0x66, 0x0C, 0x01, 0x1F, 0x2E, 0x65, 0x3D, 0x3D, 0x2A, 0x56, 0x28, 0x2E, 0x19, 0x99, 0x3C, 0x20, + 0x10, 0x56, 0x4F, 0x46, 0x47, 0x12, 0x02, 0x3E, 0xED, 0x01, 0xFC, 0xD4, 0x31, 0x03, 0x8E, 0xFE, + 0xE1, 0xFD, 0x91, 0xC4, 0x02, 0x07, 0x01, 0x04, 0x13, 0x21, 0x44, 0x1D, 0x19, 0x58, 0x15, 0x02, + 0x01, 0x13, 0x2D, 0xA2, 0x01, 0x01, 0x3D, 0x81, 0x1A, 0x01, 0x01, 0xDA, 0x01, 0x2D, 0x08, 0x3A, + 0x29, 0x0F, 0x08, 0x01, 0x85, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x14, 0xFF, 0xF5, 0x03, 0xEC, + 0x03, 0x13, 0x00, 0x09, 0x00, 0x11, 0x00, 0x26, 0x00, 0x32, 0x00, 0x00, 0x37, 0x21, 0x34, 0x10, + 0x35, 0x34, 0x27, 0x21, 0x04, 0x11, 0x23, 0x10, 0x25, 0x21, 0x16, 0x15, 0x11, 0x21, 0x37, 0x35, + 0x37, 0x36, 0x22, 0x2B, 0x01, 0x3D, 0x01, 0x3B, 0x01, 0x1D, 0x01, 0x0F, 0x01, 0x3B, 0x01, 0x1D, + 0x01, 0x2B, 0x01, 0x25, 0x35, 0x3B, 0x01, 0x1D, 0x01, 0x3B, 0x01, 0x1D, 0x01, 0x2B, 0x01, 0x45, + 0x03, 0x76, 0x45, 0xFE, 0x2D, 0xFE, 0xA2, 0x31, 0x01, 0x8F, 0x01, 0xD3, 0x76, 0xFC, 0x28, 0xA7, + 0x68, 0x68, 0x01, 0x5B, 0x5D, 0x90, 0x91, 0x6C, 0x6D, 0x71, 0x70, 0xA0, 0xA0, 0x01, 0x75, 0x27, + 0x28, 0x63, 0x63, 0x8B, 0x8A, 0x27, 0x69, 0x01, 0xA4, 0x69, 0x44, 0x01, 0x02, 0xFE, 0xA4, 0x01, + 0x8C, 0x03, 0x01, 0x75, 0xFD, 0x58, 0xBB, 0x24, 0x80, 0x80, 0x21, 0x21, 0x1F, 0x1E, 0x85, 0x86, + 0x20, 0x22, 0xC3, 0xC3, 0xA1, 0xA3, 0x20, 0x22, 0x00, 0x05, 0x00, 0x14, 0xFF, 0xF5, 0x03, 0xEC, + 0x03, 0x13, 0x00, 0x08, 0x00, 0x10, 0x00, 0x2B, 0x00, 0x37, 0x00, 0x44, 0x00, 0x00, 0x37, 0x21, + 0x11, 0x10, 0x25, 0x30, 0x21, 0x06, 0x15, 0x03, 0x11, 0x34, 0x37, 0x21, 0x04, 0x19, 0x01, 0x01, + 0x35, 0x17, 0x32, 0x17, 0x16, 0x17, 0x16, 0x07, 0x06, 0x07, 0x06, 0x17, 0x16, 0x17, 0x16, 0x17, + 0x16, 0x23, 0x2F, 0x01, 0x2E, 0x01, 0x2F, 0x01, 0x15, 0x23, 0x37, 0x32, 0x36, 0x37, 0x36, 0x35, + 0x26, 0x27, 0x26, 0x2B, 0x01, 0x15, 0x05, 0x35, 0x37, 0x36, 0x26, 0x2B, 0x01, 0x35, 0x21, 0x15, + 0x03, 0x17, 0x15, 0x45, 0x03, 0x76, 0xFE, 0xA2, 0xFE, 0x2D, 0x45, 0x31, 0x76, 0x01, 0xD3, 0x01, + 0x8F, 0xFE, 0x1E, 0x65, 0x6F, 0x15, 0x46, 0x10, 0x05, 0x04, 0x0D, 0x4F, 0x09, 0x09, 0x1F, 0x1D, + 0x3A, 0x06, 0x01, 0x30, 0x2F, 0x22, 0x37, 0x1E, 0x29, 0x14, 0x4E, 0x82, 0x34, 0x19, 0x0E, 0x13, + 0x0A, 0x22, 0x07, 0x38, 0x37, 0xFE, 0x3E, 0x68, 0x68, 0x01, 0x5C, 0x5C, 0x01, 0x20, 0xD8, 0xE1, + 0x27, 0x01, 0x5D, 0x01, 0x5B, 0x03, 0x01, 0x44, 0xFD, 0x58, 0x02, 0xA8, 0x75, 0x01, 0x03, 0xFE, + 0x74, 0xFE, 0x71, 0x01, 0x5C, 0xC5, 0x01, 0x04, 0x0C, 0x43, 0x15, 0x1D, 0x44, 0x10, 0x04, 0x06, + 0x14, 0x2B, 0x56, 0x10, 0x01, 0x01, 0x34, 0x52, 0x1C, 0x01, 0x01, 0xA5, 0xE3, 0x04, 0x06, 0x0A, + 0x20, 0x2C, 0x04, 0x01, 0x65, 0xE3, 0x47, 0x80, 0x80, 0x01, 0x42, 0x3D, 0xFE, 0xF5, 0x01, 0x41, + 0x00, 0x04, 0x00, 0x14, 0x00, 0x52, 0x03, 0xEC, 0x02, 0xB6, 0x00, 0x08, 0x00, 0x16, 0x00, 0x64, + 0x00, 0x70, 0x00, 0x00, 0x25, 0x11, 0x21, 0x22, 0x15, 0x30, 0x15, 0x14, 0x33, 0x11, 0x21, 0x32, + 0x15, 0x11, 0x14, 0x27, 0x21, 0x22, 0x26, 0x3D, 0x01, 0x34, 0x36, 0x13, 0x26, 0x27, 0x26, 0x27, + 0x26, 0x37, 0x33, 0x36, 0x37, 0x36, 0x33, 0x16, 0x17, 0x16, 0x17, 0x16, 0x37, 0x36, 0x37, 0x36, + 0x35, 0x34, 0x27, 0x26, 0x27, 0x26, 0x27, 0x26, 0x27, 0x26, 0x27, 0x26, 0x34, 0x37, 0x36, 0x37, + 0x36, 0x37, 0x36, 0x17, 0x16, 0x17, 0x16, 0x17, 0x16, 0x17, 0x16, 0x0F, 0x01, 0x22, 0x06, 0x23, + 0x27, 0x26, 0x27, 0x26, 0x23, 0x22, 0x07, 0x06, 0x07, 0x06, 0x17, 0x16, 0x17, 0x16, 0x17, 0x16, + 0x17, 0x16, 0x17, 0x16, 0x07, 0x06, 0x07, 0x06, 0x27, 0x37, 0x35, 0x3B, 0x01, 0x1D, 0x01, 0x3B, + 0x01, 0x1D, 0x01, 0x2B, 0x01, 0x03, 0xBB, 0xFD, 0x2A, 0xA0, 0xA0, 0x02, 0xEE, 0x19, 0x19, 0xFD, + 0x12, 0x57, 0x7A, 0x7A, 0xCA, 0x38, 0x1D, 0x16, 0x08, 0x03, 0x01, 0x02, 0x0F, 0x0C, 0x1E, 0x01, + 0x02, 0x04, 0x0C, 0x2B, 0x0F, 0x0E, 0x18, 0x0C, 0x09, 0x04, 0x15, 0x32, 0x23, 0x12, 0x1C, 0x0E, + 0x09, 0x03, 0x01, 0x01, 0x09, 0x21, 0x0F, 0x14, 0x2E, 0x2A, 0x13, 0x0F, 0x0C, 0x08, 0x0B, 0x05, + 0x02, 0x01, 0x02, 0x03, 0x36, 0x03, 0x02, 0x03, 0x08, 0x0D, 0x23, 0x16, 0x0E, 0x10, 0x01, 0x01, + 0x07, 0x0B, 0x32, 0x25, 0x13, 0x26, 0x0F, 0x09, 0x01, 0x01, 0x0F, 0x11, 0x24, 0x21, 0x2A, 0xE3, + 0x20, 0x20, 0x52, 0x50, 0x71, 0x71, 0x84, 0x02, 0x00, 0xAF, 0xA2, 0xAF, 0x02, 0x32, 0x19, 0xFD, + 0xCE, 0x19, 0x01, 0x84, 0x5C, 0xA2, 0x5C, 0x85, 0xFE, 0x29, 0x04, 0x1E, 0x18, 0x26, 0x0F, 0x01, + 0x02, 0x01, 0x03, 0x05, 0x0B, 0x29, 0x06, 0x02, 0x03, 0x04, 0x11, 0x0B, 0x0D, 0x0A, 0x06, 0x12, + 0x0D, 0x0A, 0x07, 0x0C, 0x18, 0x0D, 0x10, 0x06, 0x18, 0x05, 0x27, 0x14, 0x09, 0x03, 0x0A, 0x0D, + 0x06, 0x09, 0x09, 0x0D, 0x0F, 0x14, 0x0C, 0x06, 0x03, 0x02, 0x04, 0x10, 0x0A, 0x11, 0x08, 0x09, + 0x0E, 0x0C, 0x07, 0x0C, 0x0C, 0x0A, 0x07, 0x0F, 0x20, 0x11, 0x18, 0x1E, 0x1A, 0x1E, 0x0C, 0x0B, + 0x03, 0xAA, 0xA5, 0x89, 0x8A, 0x1C, 0x1B, 0x00, 0x00, 0x05, 0x00, 0x14, 0x00, 0x53, 0x03, 0xEC, + 0x02, 0xB6, 0x00, 0x08, 0x00, 0x16, 0x00, 0x2E, 0x00, 0x38, 0x00, 0x65, 0x00, 0x00, 0x01, 0x30, + 0x21, 0x11, 0x21, 0x32, 0x3D, 0x01, 0x34, 0x27, 0x32, 0x16, 0x1D, 0x01, 0x14, 0x06, 0x23, 0x21, + 0x26, 0x35, 0x11, 0x34, 0x33, 0x01, 0x11, 0x33, 0x32, 0x17, 0x16, 0x17, 0x16, 0x07, 0x06, 0x07, + 0x17, 0x1E, 0x01, 0x1F, 0x01, 0x23, 0x2A, 0x01, 0x2E, 0x01, 0x23, 0x27, 0x15, 0x37, 0x32, 0x37, + 0x36, 0x27, 0x2E, 0x01, 0x2B, 0x01, 0x15, 0x05, 0x26, 0x27, 0x37, 0x32, 0x3F, 0x01, 0x16, 0x17, + 0x1E, 0x01, 0x37, 0x36, 0x27, 0x2E, 0x04, 0x37, 0x3E, 0x01, 0x33, 0x32, 0x17, 0x16, 0x17, 0x14, + 0x06, 0x27, 0x26, 0x27, 0x26, 0x0E, 0x01, 0x1E, 0x02, 0x17, 0x16, 0x06, 0x07, 0x06, 0x07, 0x06, + 0x03, 0x1B, 0xFD, 0x2A, 0x02, 0xD6, 0xA0, 0xA0, 0x57, 0x7A, 0x7A, 0x57, 0xFD, 0x12, 0x19, 0x19, + 0x01, 0xD3, 0x47, 0x44, 0x11, 0x3E, 0x18, 0x21, 0x0B, 0x0C, 0x43, 0x04, 0x17, 0x1C, 0x1E, 0x16, + 0x26, 0x26, 0x03, 0x4D, 0x18, 0x1E, 0x11, 0x25, 0x3A, 0x0C, 0x22, 0x08, 0x03, 0x1B, 0x3E, 0x29, + 0xFE, 0xAC, 0x0D, 0x04, 0x02, 0x02, 0x1E, 0x1D, 0x03, 0x02, 0x0C, 0x4C, 0x13, 0x20, 0x07, 0x04, + 0x1B, 0x56, 0x2D, 0x1C, 0x01, 0x02, 0x44, 0x35, 0x49, 0x1F, 0x10, 0x03, 0x41, 0x01, 0x06, 0x0A, + 0x16, 0x3C, 0x18, 0x0C, 0x16, 0x5D, 0x15, 0x33, 0x03, 0x2B, 0x1E, 0x34, 0x59, 0x02, 0x84, 0xFE, + 0x00, 0xAF, 0xA2, 0xAF, 0x32, 0x85, 0x5C, 0xA2, 0x5C, 0x84, 0x01, 0x17, 0x02, 0x32, 0x19, 0xFE, + 0x2F, 0x01, 0x45, 0x01, 0x02, 0x19, 0x22, 0x32, 0x39, 0x0B, 0x08, 0x0F, 0x27, 0x2F, 0x24, 0x75, + 0x12, 0x01, 0x88, 0xBB, 0x04, 0x09, 0x2A, 0x0F, 0x0D, 0x53, 0x8A, 0x17, 0x1E, 0x04, 0x03, 0x03, + 0x0C, 0x04, 0x26, 0x0E, 0x0C, 0x14, 0x1A, 0x0E, 0x0E, 0x16, 0x16, 0x2C, 0x1A, 0x2D, 0x2D, 0x2A, + 0x16, 0x1D, 0x06, 0x04, 0x01, 0x1A, 0x09, 0x11, 0x09, 0x17, 0x18, 0x0D, 0x17, 0x0C, 0x1B, 0x71, + 0x1B, 0x12, 0x01, 0x03, 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0F, + 0x00, 0x1B, 0x00, 0x27, 0x00, 0x00, 0x00, 0x22, 0x0E, 0x02, 0x14, 0x1E, 0x02, 0x32, 0x3E, 0x02, + 0x34, 0x2E, 0x01, 0x24, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36, 0x13, + 0x33, 0x35, 0x33, 0x15, 0x33, 0x15, 0x23, 0x15, 0x23, 0x35, 0x23, 0x02, 0x5A, 0xB4, 0xA4, 0x77, + 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xFE, 0x7C, 0x01, 0x0C, 0xE2, 0x84, + 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x7C, 0xC5, 0x4E, 0xC5, 0xC4, 0x50, 0xC4, 0x03, 0x3F, + 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2, + 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFE, 0xC0, 0xC4, 0xC5, 0x4E, 0xC5, 0xC5, + 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0F, 0x00, 0x1B, 0x00, 0x1F, + 0x00, 0x00, 0x00, 0x22, 0x0E, 0x02, 0x14, 0x1E, 0x02, 0x32, 0x3E, 0x02, 0x34, 0x2E, 0x01, 0x24, + 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36, 0x13, 0x35, 0x21, 0x15, 0x02, + 0x5A, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xFE, 0x7C, + 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x7C, 0x01, 0xD8, 0x03, 0x3F, + 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x46, 0x46, 0x77, 0xA4, 0xB4, 0xA4, 0x77, 0x77, 0x84, 0xE2, + 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFE, 0x71, 0x4E, 0x4E, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, 0x00, 0x1B, 0x00, 0x25, + 0x00, 0x00, 0x00, 0x20, 0x0E, 0x01, 0x10, 0x1E, 0x01, 0x20, 0x3E, 0x01, 0x10, 0x26, 0x01, 0x12, + 0x37, 0x33, 0x13, 0x12, 0x15, 0x16, 0x23, 0x2F, 0x01, 0x23, 0x07, 0x23, 0x22, 0x26, 0x25, 0x30, + 0x27, 0x26, 0x2F, 0x01, 0x06, 0x07, 0x06, 0x32, 0x02, 0x86, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, + 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xFD, 0xA0, 0x6C, 0x5E, 0x6D, 0x68, 0x68, 0x01, 0x39, 0x38, 0x2E, + 0xD1, 0x2B, 0x37, 0x33, 0x04, 0x01, 0x48, 0x1D, 0x1C, 0x0A, 0x05, 0x01, 0x45, 0x01, 0x89, 0x03, + 0x70, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFD, 0x9A, 0x01, 0x1A, + 0xEB, 0xFE, 0xFE, 0xFE, 0xFD, 0x03, 0x01, 0x01, 0x77, 0x78, 0x01, 0xCF, 0x4C, 0x4C, 0x1C, 0x0C, + 0x02, 0xBE, 0x02, 0x00, 0x00, 0x04, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, + 0x00, 0x20, 0x00, 0x2B, 0x00, 0x35, 0x00, 0x00, 0x36, 0x10, 0x3E, 0x01, 0x20, 0x1E, 0x01, 0x10, + 0x0E, 0x01, 0x20, 0x26, 0x01, 0x30, 0x37, 0x36, 0x37, 0x36, 0x27, 0x26, 0x27, 0x26, 0x23, 0x27, + 0x19, 0x01, 0x33, 0x32, 0x37, 0x3E, 0x01, 0x35, 0x26, 0x07, 0x06, 0x2B, 0x01, 0x35, 0x33, 0x1E, + 0x02, 0x15, 0x06, 0x27, 0x23, 0x35, 0x33, 0x16, 0x17, 0x16, 0x14, 0x07, 0x06, 0x14, 0x84, 0xE2, + 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x01, 0xF7, 0x0A, 0x3A, 0x05, 0x04, 0x19, + 0x20, 0x3B, 0x1D, 0x8B, 0x81, 0x4E, 0xAF, 0x29, 0x3E, 0x4E, 0x01, 0xAE, 0x0D, 0x47, 0x46, 0x46, + 0x52, 0x2C, 0x1B, 0x01, 0xB7, 0x27, 0x4C, 0x4C, 0x07, 0x2C, 0x1E, 0x16, 0xFE, 0x01, 0x0C, 0xE2, + 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x01, 0x6D, 0x06, 0x21, 0x40, 0x2A, 0x24, 0x30, + 0x09, 0x05, 0x01, 0xFE, 0xFB, 0xFE, 0xFD, 0x03, 0x05, 0x4F, 0x41, 0x5B, 0x9B, 0x01, 0x8C, 0x01, + 0x0B, 0x21, 0x1A, 0x3E, 0xDA, 0x79, 0x01, 0x01, 0x0B, 0x54, 0x0E, 0x0A, 0x00, 0x02, 0x00, 0x14, + 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, 0x00, 0x29, 0x00, 0x00, 0x36, 0x10, 0x3E, 0x01, + 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x26, 0x36, 0x14, 0x3B, 0x01, 0x37, 0x36, 0x37, 0x36, + 0x1F, 0x01, 0x33, 0x32, 0x34, 0x02, 0x26, 0x36, 0x34, 0x23, 0x0F, 0x01, 0x06, 0x07, 0x22, 0x2F, + 0x01, 0x23, 0x16, 0x1F, 0x01, 0x07, 0x14, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, + 0xF4, 0xE2, 0x7B, 0x3D, 0x3F, 0x38, 0x3A, 0x01, 0x02, 0x3A, 0x39, 0x3F, 0x3E, 0xB0, 0x01, 0xA1, + 0x3C, 0x3C, 0x32, 0x33, 0x01, 0x02, 0x34, 0x34, 0x7A, 0x05, 0x4F, 0x4D, 0x58, 0xFE, 0x01, 0x0C, + 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x62, 0x02, 0x59, 0x59, 0x02, 0x01, 0x5A, + 0x5B, 0x02, 0x01, 0x08, 0x04, 0xFB, 0x01, 0x01, 0x53, 0x53, 0x01, 0x54, 0x54, 0x06, 0x7A, 0x79, + 0x88, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, + 0x00, 0x1B, 0x00, 0x00, 0x00, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36, + 0x01, 0x15, 0x33, 0x35, 0x13, 0x23, 0x07, 0x0E, 0x01, 0x2F, 0x01, 0x23, 0x22, 0x16, 0x1F, 0x01, + 0x01, 0x7A, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x01, 0x36, 0x68, + 0xBE, 0x77, 0x3B, 0x3C, 0x02, 0x3D, 0x3D, 0x3D, 0x3D, 0x01, 0x5F, 0x5E, 0x03, 0x70, 0x84, 0xE2, + 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFD, 0xF9, 0x6D, 0xDA, 0x01, 0x2D, 0x65, + 0x66, 0x03, 0x67, 0x67, 0x01, 0x95, 0x96, 0x00, 0x00, 0x02, 0x00, 0x14, 0xFF, 0xBF, 0x03, 0xEC, + 0x03, 0x4A, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x05, 0x21, 0x11, 0x10, 0x05, 0x21, 0x01, 0x21, + 0x35, 0x21, 0x11, 0x23, 0x03, 0xEC, 0xFC, 0x28, 0x01, 0x14, 0x02, 0xC4, 0xFD, 0x5C, 0x01, 0x70, + 0xFE, 0xF8, 0x68, 0x41, 0x02, 0x77, 0x01, 0x14, 0x01, 0xFD, 0x38, 0x57, 0x01, 0xB0, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x14, 0xFF, 0xBF, 0x03, 0xEC, 0x03, 0x49, 0x00, 0x05, 0x00, 0x20, 0x00, 0x2B, + 0x00, 0x00, 0x17, 0x11, 0x21, 0x20, 0x19, 0x01, 0x25, 0x33, 0x35, 0x17, 0x1E, 0x01, 0x1F, 0x01, + 0x33, 0x37, 0x2E, 0x02, 0x27, 0x34, 0x37, 0x36, 0x37, 0x36, 0x27, 0x26, 0x27, 0x26, 0x27, 0x26, + 0x2B, 0x01, 0x05, 0x06, 0x2B, 0x01, 0x35, 0x33, 0x16, 0x17, 0x16, 0x15, 0x14, 0x14, 0x02, 0xC4, + 0x01, 0x14, 0xFD, 0x2A, 0x69, 0x19, 0x2E, 0x28, 0x56, 0x2A, 0x3D, 0x3D, 0x01, 0x65, 0x2C, 0x20, + 0x0D, 0x66, 0x13, 0x06, 0x04, 0x09, 0x34, 0x20, 0x49, 0x15, 0x76, 0x77, 0x01, 0x02, 0x0C, 0x47, + 0x46, 0x4F, 0x56, 0x10, 0x20, 0x41, 0x03, 0x8A, 0xFE, 0xED, 0xFD, 0x89, 0xC2, 0xDA, 0x01, 0x01, + 0x1A, 0x81, 0x3D, 0x01, 0x01, 0xA3, 0x2C, 0x13, 0x01, 0x02, 0x13, 0x5A, 0x1A, 0x1C, 0x44, 0x21, + 0x13, 0x04, 0x01, 0xDA, 0x02, 0x85, 0x01, 0x08, 0x0F, 0x29, 0x3A, 0x00, 0x00, 0x03, 0x00, 0x14, + 0xFF, 0xFB, 0x03, 0xEC, 0x03, 0x0E, 0x00, 0x08, 0x00, 0x15, 0x00, 0x1B, 0x00, 0x00, 0x05, 0x21, + 0x11, 0x10, 0x21, 0x30, 0x21, 0x32, 0x15, 0x01, 0x21, 0x35, 0x23, 0x13, 0x35, 0x21, 0x15, 0x33, + 0x32, 0x22, 0x0F, 0x01, 0x05, 0x21, 0x35, 0x23, 0x11, 0x23, 0x03, 0xEC, 0xFC, 0x28, 0x01, 0x8A, + 0x01, 0xEC, 0x62, 0xFC, 0xCF, 0x01, 0x40, 0xE1, 0xD9, 0xFE, 0xDF, 0x5D, 0x5C, 0x01, 0x67, 0x68, + 0x01, 0x75, 0x01, 0x15, 0xC6, 0x4F, 0x05, 0x01, 0x89, 0x01, 0x8A, 0x63, 0xFD, 0xE1, 0x42, 0x01, + 0x0B, 0x3D, 0x42, 0x80, 0x80, 0x48, 0x42, 0x01, 0x44, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x14, + 0xFF, 0xFB, 0x03, 0xEC, 0x03, 0x0E, 0x00, 0x07, 0x00, 0x22, 0x00, 0x2F, 0x00, 0x3C, 0x00, 0x00, + 0x17, 0x11, 0x34, 0x37, 0x21, 0x20, 0x19, 0x01, 0x01, 0x15, 0x33, 0x35, 0x17, 0x1E, 0x01, 0x1F, + 0x02, 0x32, 0x35, 0x26, 0x27, 0x26, 0x27, 0x26, 0x37, 0x36, 0x37, 0x36, 0x27, 0x26, 0x27, 0x26, + 0x23, 0x27, 0x17, 0x30, 0x23, 0x35, 0x33, 0x32, 0x17, 0x16, 0x17, 0x14, 0x07, 0x0E, 0x01, 0x05, + 0x21, 0x35, 0x27, 0x13, 0x35, 0x21, 0x15, 0x33, 0x32, 0x14, 0x0F, 0x01, 0x14, 0x62, 0x01, 0xEC, + 0x01, 0x8A, 0xFE, 0x1E, 0x4E, 0x14, 0x29, 0x1E, 0x37, 0x22, 0x2F, 0x2F, 0x06, 0x3A, 0x1D, 0x1F, + 0x09, 0x09, 0x4E, 0x0E, 0x04, 0x05, 0x0F, 0x47, 0x15, 0x6F, 0x65, 0x82, 0x34, 0x37, 0x38, 0x07, + 0x23, 0x09, 0x13, 0x0D, 0x1A, 0xFD, 0xD6, 0x01, 0x40, 0xE1, 0xD8, 0xFE, 0xE0, 0x5C, 0x5C, 0x67, + 0x68, 0x05, 0x02, 0xB0, 0x62, 0x01, 0xFE, 0x76, 0xFE, 0x77, 0x01, 0x56, 0xC5, 0xA5, 0x01, 0x01, + 0x1C, 0x52, 0x34, 0x01, 0x01, 0x0E, 0x58, 0x2C, 0x13, 0x06, 0x04, 0x0F, 0x45, 0x1E, 0x14, 0x42, + 0x0D, 0x04, 0x01, 0xA7, 0x65, 0x01, 0x04, 0x2C, 0x21, 0x09, 0x07, 0x03, 0xE3, 0x41, 0x01, 0x01, + 0x0B, 0x3D, 0x42, 0x01, 0x80, 0x80, 0x00, 0x00, 0x00, 0x03, 0x00, 0x14, 0x00, 0x5D, 0x03, 0xEC, + 0x02, 0xAB, 0x00, 0x08, 0x00, 0x37, 0x00, 0x3D, 0x00, 0x00, 0x13, 0x30, 0x21, 0x11, 0x21, 0x22, + 0x3D, 0x01, 0x34, 0x05, 0x37, 0x34, 0x27, 0x26, 0x27, 0x26, 0x07, 0x06, 0x07, 0x0E, 0x01, 0x17, + 0x1E, 0x01, 0x17, 0x16, 0x14, 0x07, 0x06, 0x26, 0x27, 0x26, 0x27, 0x22, 0x06, 0x07, 0x22, 0x17, + 0x1E, 0x01, 0x17, 0x16, 0x37, 0x36, 0x27, 0x26, 0x27, 0x2E, 0x02, 0x37, 0x36, 0x33, 0x32, 0x1F, + 0x02, 0x33, 0x35, 0x23, 0x11, 0x23, 0xD6, 0x03, 0x16, 0xFC, 0xEA, 0xC2, 0x01, 0xC6, 0x02, 0x01, + 0x0C, 0x3A, 0x2B, 0x2D, 0x13, 0x10, 0x2B, 0x01, 0x33, 0x17, 0x55, 0x15, 0x04, 0x09, 0x14, 0x58, + 0x0C, 0x04, 0x02, 0x02, 0x26, 0x14, 0x01, 0x03, 0x08, 0x33, 0x38, 0x5F, 0x20, 0x10, 0x01, 0x03, + 0x3C, 0x12, 0x59, 0x11, 0x01, 0x02, 0x39, 0x2C, 0x09, 0x02, 0x9D, 0xE2, 0xA2, 0x40, 0x02, 0xAB, + 0xFD, 0xB2, 0xD2, 0xAA, 0xD2, 0xDC, 0x03, 0x07, 0x0B, 0x38, 0x10, 0x0C, 0x09, 0x04, 0x08, 0x19, + 0x6C, 0x17, 0x0B, 0x17, 0x11, 0x07, 0x17, 0x0A, 0x1A, 0x0A, 0x29, 0x0C, 0x04, 0x04, 0x02, 0x10, + 0x25, 0x37, 0x04, 0x06, 0x37, 0x1D, 0x1C, 0x3F, 0x19, 0x08, 0x16, 0x13, 0x0B, 0x1F, 0x2B, 0x04, + 0xE9, 0x37, 0x01, 0x13, 0x00, 0x04, 0x00, 0x14, 0x00, 0x5D, 0x03, 0xEC, 0x02, 0xAB, 0x00, 0x07, + 0x00, 0x1F, 0x00, 0x2A, 0x00, 0x58, 0x00, 0x00, 0x01, 0x32, 0x1D, 0x01, 0x14, 0x23, 0x21, 0x11, + 0x01, 0x33, 0x35, 0x17, 0x1E, 0x03, 0x3B, 0x01, 0x27, 0x2E, 0x01, 0x2F, 0x01, 0x36, 0x37, 0x36, + 0x27, 0x26, 0x27, 0x26, 0x2B, 0x01, 0x17, 0x30, 0x23, 0x35, 0x33, 0x32, 0x16, 0x17, 0x16, 0x07, + 0x06, 0x05, 0x16, 0x37, 0x36, 0x37, 0x3E, 0x01, 0x27, 0x2E, 0x03, 0x3E, 0x01, 0x17, 0x16, 0x17, + 0x30, 0x37, 0x36, 0x27, 0x26, 0x27, 0x26, 0x27, 0x22, 0x06, 0x07, 0x06, 0x1E, 0x03, 0x17, 0x16, + 0x07, 0x06, 0x26, 0x27, 0x26, 0x27, 0x07, 0x06, 0x23, 0x07, 0x16, 0x03, 0x2A, 0xC2, 0xC2, 0xFC, + 0xEA, 0x01, 0xEC, 0x41, 0x11, 0x1F, 0x17, 0x4D, 0x02, 0x27, 0x26, 0x16, 0x1E, 0x1C, 0x17, 0x04, + 0x43, 0x0C, 0x0B, 0x21, 0x18, 0x3E, 0x0F, 0x46, 0x47, 0x66, 0x25, 0x29, 0x3E, 0x1B, 0x03, 0x08, + 0x22, 0x0C, 0xFE, 0x4D, 0x22, 0x59, 0x34, 0x1E, 0x2B, 0x03, 0x33, 0x16, 0x5C, 0x16, 0x0C, 0x18, + 0x3C, 0x16, 0x0B, 0x05, 0x22, 0x21, 0x01, 0x03, 0x10, 0x1F, 0x49, 0x36, 0x43, 0x02, 0x01, 0x1C, + 0x2D, 0x56, 0x1B, 0x04, 0x07, 0x20, 0x13, 0x4B, 0x0D, 0x01, 0x04, 0x1D, 0x1E, 0x02, 0x02, 0x04, + 0x02, 0xAB, 0xD2, 0xAA, 0xD2, 0x02, 0x4E, 0xFE, 0x39, 0x89, 0x01, 0x01, 0x11, 0x75, 0x01, 0x25, + 0x2F, 0x27, 0x0F, 0x08, 0x0C, 0x38, 0x33, 0x21, 0x19, 0x02, 0x01, 0x8A, 0x53, 0x0D, 0x0F, 0x2A, + 0x09, 0x04, 0x8A, 0x3A, 0x03, 0x01, 0x12, 0x1B, 0x71, 0x1B, 0x0C, 0x17, 0x0D, 0x18, 0x17, 0x09, + 0x11, 0x09, 0x1A, 0x01, 0x01, 0x07, 0x1E, 0x15, 0x29, 0x01, 0x2D, 0x2D, 0x1A, 0x2C, 0x16, 0x16, + 0x0D, 0x0F, 0x1A, 0x14, 0x0C, 0x0D, 0x27, 0x04, 0x0C, 0x03, 0x03, 0x04, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, 0x00, 0x17, 0x00, 0x00, + 0x00, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36, 0x13, 0x15, 0x33, 0x15, + 0x33, 0x35, 0x33, 0x35, 0x23, 0x35, 0x23, 0x15, 0x01, 0x7A, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, + 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x7C, 0xC4, 0x50, 0xC4, 0xC5, 0x4E, 0x03, 0x70, 0x84, 0xE2, 0xFE, + 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFE, 0xC0, 0x4F, 0xC5, 0xC5, 0x4E, 0xC5, 0xC4, + 0x00, 0x02, 0x00, 0x14, 0xFF, 0x98, 0x03, 0xEC, 0x03, 0x70, 0x00, 0x0B, 0x00, 0x0F, 0x00, 0x00, + 0x00, 0x20, 0x1E, 0x01, 0x10, 0x0E, 0x01, 0x20, 0x2E, 0x01, 0x10, 0x36, 0x13, 0x21, 0x35, 0x21, + 0x01, 0x7A, 0x01, 0x0C, 0xE2, 0x84, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0x7C, 0x01, 0xD8, + 0xFE, 0x28, 0x03, 0x70, 0x84, 0xE2, 0xFE, 0xF4, 0xE2, 0x84, 0x84, 0xE2, 0x01, 0x0C, 0xE2, 0xFE, + 0x71, 0x4E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0xAE, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x15, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10, + 0x00, 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x07, 0x00, 0x85, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0xAF, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x10, 0x00, 0xE2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x0D, + 0x01, 0x0F, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x10, 0x01, 0x3F, 0x00, 0x03, + 0x00, 0x01, 0x04, 0x09, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, + 0x00, 0x01, 0x00, 0x20, 0x00, 0x42, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x02, 0x00, 0x0E, + 0x00, 0x75, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x03, 0x00, 0x20, 0x00, 0x8D, 0x00, 0x03, + 0x00, 0x01, 0x04, 0x09, 0x00, 0x04, 0x00, 0x20, 0x00, 0xC0, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, + 0x00, 0x05, 0x00, 0x1A, 0x00, 0xF3, 0x00, 0x03, 0x00, 0x01, 0x04, 0x09, 0x00, 0x06, 0x00, 0x20, + 0x01, 0x1D, 0x00, 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x20, 0x00, 0x45, 0x00, 0x6D, + 0x00, 0x75, 0x00, 0x6C, 0x00, 0x61, 0x00, 0x74, 0x00, 0x6F, 0x00, 0x72, 0x00, 0x20, 0x00, 0x50, + 0x00, 0x72, 0x00, 0x6F, 0x00, 0x6A, 0x00, 0x65, 0x00, 0x63, 0x00, 0x74, 0x00, 0x00, 0x59, 0x75, + 0x7A, 0x75, 0x20, 0x45, 0x6D, 0x75, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x20, 0x50, 0x72, 0x6F, 0x6A, + 0x65, 0x63, 0x74, 0x00, 0x00, 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x4F, 0x00, 0x53, + 0x00, 0x53, 0x00, 0x45, 0x00, 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6E, 0x00, 0x73, 0x00, 0x69, + 0x00, 0x6F, 0x00, 0x6E, 0x00, 0x00, 0x59, 0x75, 0x7A, 0x75, 0x4F, 0x53, 0x53, 0x45, 0x78, 0x74, + 0x65, 0x6E, 0x73, 0x69, 0x6F, 0x6E, 0x00, 0x00, 0x52, 0x00, 0x65, 0x00, 0x67, 0x00, 0x75, 0x00, + 0x6C, 0x00, 0x61, 0x00, 0x72, 0x00, 0x00, 0x52, 0x65, 0x67, 0x75, 0x6C, 0x61, 0x72, 0x00, 0x00, + 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x4F, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00, + 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6E, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6F, 0x00, 0x6E, 0x00, + 0x00, 0x59, 0x75, 0x7A, 0x75, 0x4F, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6E, 0x73, 0x69, 0x6F, + 0x6E, 0x00, 0x00, 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x4F, 0x00, 0x53, 0x00, 0x53, + 0x00, 0x45, 0x00, 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6E, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6F, + 0x00, 0x6E, 0x00, 0x00, 0x59, 0x75, 0x7A, 0x75, 0x4F, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6E, + 0x73, 0x69, 0x6F, 0x6E, 0x00, 0x00, 0x56, 0x00, 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x69, 0x00, + 0x6F, 0x00, 0x6E, 0x00, 0x20, 0x00, 0x31, 0x00, 0x2E, 0x00, 0x30, 0x00, 0x30, 0x00, 0x30, 0x00, + 0x00, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x20, 0x31, 0x2E, 0x30, 0x30, 0x30, 0x00, 0x00, + 0x59, 0x00, 0x75, 0x00, 0x7A, 0x00, 0x75, 0x00, 0x4F, 0x00, 0x53, 0x00, 0x53, 0x00, 0x45, 0x00, + 0x78, 0x00, 0x74, 0x00, 0x65, 0x00, 0x6E, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6F, 0x00, 0x6E, 0x00, + 0x00, 0x59, 0x75, 0x7A, 0x75, 0x4F, 0x53, 0x53, 0x45, 0x78, 0x74, 0x65, 0x6E, 0x73, 0x69, 0x6F, + 0x6E, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xB5, 0x00, 0x32, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x01, 0x02, 0x01, 0x03, 0x00, 0x03, 0x01, 0x04, + 0x01, 0x05, 0x01, 0x06, 0x01, 0x07, 0x01, 0x08, 0x01, 0x09, 0x01, 0x0A, 0x01, 0x0B, 0x01, 0x0C, + 0x01, 0x0D, 0x01, 0x0E, 0x01, 0x0F, 0x01, 0x10, 0x01, 0x11, 0x01, 0x12, 0x01, 0x13, 0x01, 0x14, + 0x01, 0x15, 0x01, 0x16, 0x01, 0x17, 0x01, 0x18, 0x01, 0x19, 0x01, 0x1A, 0x01, 0x1B, 0x07, 0x75, + 0x6E, 0x69, 0x30, 0x30, 0x30, 0x30, 0x07, 0x75, 0x6E, 0x69, 0x30, 0x30, 0x30, 0x44, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x41, 0x30, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x31, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x41, 0x32, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x33, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x41, 0x34, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x35, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x41, 0x36, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x37, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x41, 0x38, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x41, 0x39, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x42, 0x33, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x42, 0x34, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x45, 0x30, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x31, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x45, 0x32, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x33, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x45, 0x34, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x35, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x45, 0x36, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x37, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x45, 0x38, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x45, 0x39, 0x07, 0x75, + 0x6E, 0x69, 0x45, 0x30, 0x45, 0x46, 0x07, 0x75, 0x6E, 0x69, 0x45, 0x30, 0x46, 0x30, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x01, 0xFF, 0xFF, 0x00, 0x0F, }}; } // namespace FileSys::SystemArchive::SharedFontData diff --git a/src/core/file_sys/system_archive/data/font_nintendo_extended.h b/src/core/file_sys/system_archive/data/font_nintendo_extended.h index 2089f3db9..edb9df914 100644 --- a/src/core/file_sys/system_archive/data/font_nintendo_extended.h +++ b/src/core/file_sys/system_archive/data/font_nintendo_extended.h @@ -8,6 +8,6 @@ namespace FileSys::SystemArchive::SharedFontData { -extern const std::array<unsigned char, 2932> FONT_NINTENDO_EXTENDED; +extern const std::array<unsigned char, 6024> FONT_NINTENDO_EXTENDED; } // namespace FileSys::SystemArchive::SharedFontData diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp index aa313de66..7bfbc9a67 100644 --- a/src/core/file_sys/system_archive/system_version.cpp +++ b/src/core/file_sys/system_archive/system_version.cpp @@ -12,17 +12,17 @@ namespace SystemVersionData { // This section should reflect the best system version to describe yuzu's HLE api. // TODO(DarkLordZach): Update when HLE gets better. -constexpr u8 VERSION_MAJOR = 10; +constexpr u8 VERSION_MAJOR = 11; constexpr u8 VERSION_MINOR = 0; -constexpr u8 VERSION_MICRO = 2; +constexpr u8 VERSION_MICRO = 0; -constexpr u8 REVISION_MAJOR = 1; +constexpr u8 REVISION_MAJOR = 5; constexpr u8 REVISION_MINOR = 0; constexpr char PLATFORM_STRING[] = "NX"; -constexpr char VERSION_HASH[] = "f90143fa8bbc061d4f68c35f95f04f8080c0ecdc"; -constexpr char DISPLAY_VERSION[] = "10.0.2"; -constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.2-1.0"; +constexpr char VERSION_HASH[] = "34197eba8810e2edd5e9dfcfbde7b340882e856d"; +constexpr char DISPLAY_VERSION[] = "11.0.0"; +constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 11.0.0-5.0"; } // namespace SystemVersionData diff --git a/src/core/file_sys/vfs.cpp b/src/core/file_sys/vfs.cpp index b2f026b6d..f497e9396 100644 --- a/src/core/file_sys/vfs.cpp +++ b/src/core/file_sys/vfs.cpp @@ -203,7 +203,7 @@ std::string VfsFile::GetFullPath() const { return GetContainingDirectory()->GetFullPath() + "/" + GetName(); } -std::shared_ptr<VfsFile> VfsDirectory::GetFileRelative(std::string_view path) const { +VirtualFile VfsDirectory::GetFileRelative(std::string_view path) const { auto vec = Common::FS::SplitPathComponents(path); vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }), vec.end()); @@ -231,7 +231,7 @@ std::shared_ptr<VfsFile> VfsDirectory::GetFileRelative(std::string_view path) co return dir->GetFile(vec.back()); } -std::shared_ptr<VfsFile> VfsDirectory::GetFileAbsolute(std::string_view path) const { +VirtualFile VfsDirectory::GetFileAbsolute(std::string_view path) const { if (IsRoot()) { return GetFileRelative(path); } @@ -239,7 +239,7 @@ std::shared_ptr<VfsFile> VfsDirectory::GetFileAbsolute(std::string_view path) co return GetParentDirectory()->GetFileAbsolute(path); } -std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryRelative(std::string_view path) const { +VirtualDir VfsDirectory::GetDirectoryRelative(std::string_view path) const { auto vec = Common::FS::SplitPathComponents(path); vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }), vec.end()); @@ -261,7 +261,7 @@ std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryRelative(std::string_vie return dir; } -std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryAbsolute(std::string_view path) const { +VirtualDir VfsDirectory::GetDirectoryAbsolute(std::string_view path) const { if (IsRoot()) { return GetDirectoryRelative(path); } @@ -269,14 +269,14 @@ std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryAbsolute(std::string_vie return GetParentDirectory()->GetDirectoryAbsolute(path); } -std::shared_ptr<VfsFile> VfsDirectory::GetFile(std::string_view name) const { +VirtualFile VfsDirectory::GetFile(std::string_view name) const { const auto& files = GetFiles(); const auto iter = std::find_if(files.begin(), files.end(), [&name](const auto& file1) { return name == file1->GetName(); }); return iter == files.end() ? nullptr : *iter; } -std::shared_ptr<VfsDirectory> VfsDirectory::GetSubdirectory(std::string_view name) const { +VirtualDir VfsDirectory::GetSubdirectory(std::string_view name) const { const auto& subs = GetSubdirectories(); const auto iter = std::find_if(subs.begin(), subs.end(), [&name](const auto& file1) { return name == file1->GetName(); }); @@ -301,7 +301,7 @@ std::size_t VfsDirectory::GetSize() const { return file_total + subdir_total; } -std::shared_ptr<VfsFile> VfsDirectory::CreateFileRelative(std::string_view path) { +VirtualFile VfsDirectory::CreateFileRelative(std::string_view path) { auto vec = Common::FS::SplitPathComponents(path); vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }), vec.end()); @@ -324,7 +324,7 @@ std::shared_ptr<VfsFile> VfsDirectory::CreateFileRelative(std::string_view path) return dir->CreateFileRelative(Common::FS::GetPathWithoutTop(path)); } -std::shared_ptr<VfsFile> VfsDirectory::CreateFileAbsolute(std::string_view path) { +VirtualFile VfsDirectory::CreateFileAbsolute(std::string_view path) { if (IsRoot()) { return CreateFileRelative(path); } @@ -332,7 +332,7 @@ std::shared_ptr<VfsFile> VfsDirectory::CreateFileAbsolute(std::string_view path) return GetParentDirectory()->CreateFileAbsolute(path); } -std::shared_ptr<VfsDirectory> VfsDirectory::CreateDirectoryRelative(std::string_view path) { +VirtualDir VfsDirectory::CreateDirectoryRelative(std::string_view path) { auto vec = Common::FS::SplitPathComponents(path); vec.erase(std::remove_if(vec.begin(), vec.end(), [](const auto& str) { return str.empty(); }), vec.end()); @@ -355,7 +355,7 @@ std::shared_ptr<VfsDirectory> VfsDirectory::CreateDirectoryRelative(std::string_ return dir->CreateDirectoryRelative(Common::FS::GetPathWithoutTop(path)); } -std::shared_ptr<VfsDirectory> VfsDirectory::CreateDirectoryAbsolute(std::string_view path) { +VirtualDir VfsDirectory::CreateDirectoryAbsolute(std::string_view path) { if (IsRoot()) { return CreateDirectoryRelative(path); } @@ -446,27 +446,27 @@ bool ReadOnlyVfsDirectory::IsReadable() const { return true; } -std::shared_ptr<VfsDirectory> ReadOnlyVfsDirectory::CreateSubdirectory(std::string_view name) { +VirtualDir ReadOnlyVfsDirectory::CreateSubdirectory(std::string_view name) { return nullptr; } -std::shared_ptr<VfsFile> ReadOnlyVfsDirectory::CreateFile(std::string_view name) { +VirtualFile ReadOnlyVfsDirectory::CreateFile(std::string_view name) { return nullptr; } -std::shared_ptr<VfsFile> ReadOnlyVfsDirectory::CreateFileAbsolute(std::string_view path) { +VirtualFile ReadOnlyVfsDirectory::CreateFileAbsolute(std::string_view path) { return nullptr; } -std::shared_ptr<VfsFile> ReadOnlyVfsDirectory::CreateFileRelative(std::string_view path) { +VirtualFile ReadOnlyVfsDirectory::CreateFileRelative(std::string_view path) { return nullptr; } -std::shared_ptr<VfsDirectory> ReadOnlyVfsDirectory::CreateDirectoryAbsolute(std::string_view path) { +VirtualDir ReadOnlyVfsDirectory::CreateDirectoryAbsolute(std::string_view path) { return nullptr; } -std::shared_ptr<VfsDirectory> ReadOnlyVfsDirectory::CreateDirectoryRelative(std::string_view path) { +VirtualDir ReadOnlyVfsDirectory::CreateDirectoryRelative(std::string_view path) { return nullptr; } diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h index 954094772..afd64e95c 100644 --- a/src/core/file_sys/vfs.h +++ b/src/core/file_sys/vfs.h @@ -91,7 +91,7 @@ public: // Resizes the file to new_size. Returns whether or not the operation was successful. virtual bool Resize(std::size_t new_size) = 0; // Gets a pointer to the directory containing this file, returning nullptr if there is none. - virtual std::shared_ptr<VfsDirectory> GetContainingDirectory() const = 0; + virtual VirtualDir GetContainingDirectory() const = 0; // Returns whether or not the file can be written to. virtual bool IsWritable() const = 0; @@ -183,27 +183,27 @@ public: // Retrives the file located at path as if the current directory was root. Returns nullptr if // not found. - virtual std::shared_ptr<VfsFile> GetFileRelative(std::string_view path) const; + virtual VirtualFile GetFileRelative(std::string_view path) const; // Calls GetFileRelative(path) on the root of the current directory. - virtual std::shared_ptr<VfsFile> GetFileAbsolute(std::string_view path) const; + virtual VirtualFile GetFileAbsolute(std::string_view path) const; // Retrives the directory located at path as if the current directory was root. Returns nullptr // if not found. - virtual std::shared_ptr<VfsDirectory> GetDirectoryRelative(std::string_view path) const; + virtual VirtualDir GetDirectoryRelative(std::string_view path) const; // Calls GetDirectoryRelative(path) on the root of the current directory. - virtual std::shared_ptr<VfsDirectory> GetDirectoryAbsolute(std::string_view path) const; + virtual VirtualDir GetDirectoryAbsolute(std::string_view path) const; // Returns a vector containing all of the files in this directory. - virtual std::vector<std::shared_ptr<VfsFile>> GetFiles() const = 0; + virtual std::vector<VirtualFile> GetFiles() const = 0; // Returns the file with filename matching name. Returns nullptr if directory dosen't have a // file with name. - virtual std::shared_ptr<VfsFile> GetFile(std::string_view name) const; + virtual VirtualFile GetFile(std::string_view name) const; // Returns a vector containing all of the subdirectories in this directory. - virtual std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const = 0; + virtual std::vector<VirtualDir> GetSubdirectories() const = 0; // Returns the directory with name matching name. Returns nullptr if directory dosen't have a // directory with name. - virtual std::shared_ptr<VfsDirectory> GetSubdirectory(std::string_view name) const; + virtual VirtualDir GetSubdirectory(std::string_view name) const; // Returns whether or not the directory can be written to. virtual bool IsWritable() const = 0; @@ -219,31 +219,31 @@ public: virtual std::size_t GetSize() const; // Returns the parent directory of this directory. Returns nullptr if this directory is root or // has no parent. - virtual std::shared_ptr<VfsDirectory> GetParentDirectory() const = 0; + virtual VirtualDir GetParentDirectory() const = 0; // Creates a new subdirectory with name name. Returns a pointer to the new directory or nullptr // if the operation failed. - virtual std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) = 0; + virtual VirtualDir CreateSubdirectory(std::string_view name) = 0; // Creates a new file with name name. Returns a pointer to the new file or nullptr if the // operation failed. - virtual std::shared_ptr<VfsFile> CreateFile(std::string_view name) = 0; + virtual VirtualFile CreateFile(std::string_view name) = 0; // Creates a new file at the path relative to this directory. Also creates directories if // they do not exist and is supported by this implementation. Returns nullptr on any failure. - virtual std::shared_ptr<VfsFile> CreateFileRelative(std::string_view path); + virtual VirtualFile CreateFileRelative(std::string_view path); // Creates a new file at the path relative to root of this directory. Also creates directories // if they do not exist and is supported by this implementation. Returns nullptr on any failure. - virtual std::shared_ptr<VfsFile> CreateFileAbsolute(std::string_view path); + virtual VirtualFile CreateFileAbsolute(std::string_view path); // Creates a new directory at the path relative to this directory. Also creates directories if // they do not exist and is supported by this implementation. Returns nullptr on any failure. - virtual std::shared_ptr<VfsDirectory> CreateDirectoryRelative(std::string_view path); + virtual VirtualDir CreateDirectoryRelative(std::string_view path); // Creates a new directory at the path relative to root of this directory. Also creates // directories if they do not exist and is supported by this implementation. Returns nullptr on // any failure. - virtual std::shared_ptr<VfsDirectory> CreateDirectoryAbsolute(std::string_view path); + virtual VirtualDir CreateDirectoryAbsolute(std::string_view path); // Deletes the subdirectory with the given name and returns true on success. virtual bool DeleteSubdirectory(std::string_view name) = 0; @@ -280,12 +280,12 @@ class ReadOnlyVfsDirectory : public VfsDirectory { public: bool IsWritable() const override; bool IsReadable() const override; - std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; - std::shared_ptr<VfsFile> CreateFile(std::string_view name) override; - std::shared_ptr<VfsFile> CreateFileAbsolute(std::string_view path) override; - std::shared_ptr<VfsFile> CreateFileRelative(std::string_view path) override; - std::shared_ptr<VfsDirectory> CreateDirectoryAbsolute(std::string_view path) override; - std::shared_ptr<VfsDirectory> CreateDirectoryRelative(std::string_view path) override; + VirtualDir CreateSubdirectory(std::string_view name) override; + VirtualFile CreateFile(std::string_view name) override; + VirtualFile CreateFileAbsolute(std::string_view path) override; + VirtualFile CreateFileRelative(std::string_view path) override; + VirtualDir CreateDirectoryAbsolute(std::string_view path) override; + VirtualDir CreateDirectoryRelative(std::string_view path) override; bool DeleteSubdirectory(std::string_view name) override; bool DeleteSubdirectoryRecursive(std::string_view name) override; bool CleanSubdirectoryRecursive(std::string_view name) override; diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp index e0ff70174..3c5a7d87a 100644 --- a/src/core/file_sys/vfs_concat.cpp +++ b/src/core/file_sys/vfs_concat.cpp @@ -46,7 +46,7 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(std::vector<VirtualFile> f if (files.size() == 1) return files[0]; - return std::shared_ptr<VfsFile>(new ConcatenatedVfsFile(std::move(files), std::move(name))); + return VirtualFile(new ConcatenatedVfsFile(std::move(files), std::move(name))); } VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, @@ -71,20 +71,23 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, if (files.begin()->first != 0) files.emplace(0, std::make_shared<StaticVfsFile>(filler_byte, files.begin()->first)); - return std::shared_ptr<VfsFile>(new ConcatenatedVfsFile(std::move(files), std::move(name))); + return VirtualFile(new ConcatenatedVfsFile(std::move(files), std::move(name))); } std::string ConcatenatedVfsFile::GetName() const { - if (files.empty()) + if (files.empty()) { return ""; - if (!name.empty()) + } + if (!name.empty()) { return name; + } return files.begin()->second->GetName(); } std::size_t ConcatenatedVfsFile::GetSize() const { - if (files.empty()) + if (files.empty()) { return 0; + } return files.rbegin()->first + files.rbegin()->second->GetSize(); } @@ -92,9 +95,10 @@ bool ConcatenatedVfsFile::Resize(std::size_t new_size) { return false; } -std::shared_ptr<VfsDirectory> ConcatenatedVfsFile::GetContainingDirectory() const { - if (files.empty()) +VirtualDir ConcatenatedVfsFile::GetContainingDirectory() const { + if (files.empty()) { return nullptr; + } return files.begin()->second->GetContainingDirectory(); } diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h index 7a26343c0..287c72555 100644 --- a/src/core/file_sys/vfs_concat.h +++ b/src/core/file_sys/vfs_concat.h @@ -31,7 +31,7 @@ public: std::string GetName() const override; std::size_t GetSize() const override; bool Resize(std::size_t new_size) override; - std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; + VirtualDir GetContainingDirectory() const override; bool IsWritable() const override; bool IsReadable() const override; std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override; diff --git a/src/core/file_sys/vfs_layered.cpp b/src/core/file_sys/vfs_layered.cpp index 338e398da..434b03cec 100644 --- a/src/core/file_sys/vfs_layered.cpp +++ b/src/core/file_sys/vfs_layered.cpp @@ -20,10 +20,10 @@ VirtualDir LayeredVfsDirectory::MakeLayeredDirectory(std::vector<VirtualDir> dir if (dirs.size() == 1) return dirs[0]; - return std::shared_ptr<VfsDirectory>(new LayeredVfsDirectory(std::move(dirs), std::move(name))); + return VirtualDir(new LayeredVfsDirectory(std::move(dirs), std::move(name))); } -std::shared_ptr<VfsFile> LayeredVfsDirectory::GetFileRelative(std::string_view path) const { +VirtualFile LayeredVfsDirectory::GetFileRelative(std::string_view path) const { for (const auto& layer : dirs) { const auto file = layer->GetFileRelative(path); if (file != nullptr) @@ -33,23 +33,23 @@ std::shared_ptr<VfsFile> LayeredVfsDirectory::GetFileRelative(std::string_view p return nullptr; } -std::shared_ptr<VfsDirectory> LayeredVfsDirectory::GetDirectoryRelative( - std::string_view path) const { +VirtualDir LayeredVfsDirectory::GetDirectoryRelative(std::string_view path) const { std::vector<VirtualDir> out; for (const auto& layer : dirs) { auto dir = layer->GetDirectoryRelative(path); - if (dir != nullptr) + if (dir != nullptr) { out.push_back(std::move(dir)); + } } return MakeLayeredDirectory(std::move(out)); } -std::shared_ptr<VfsFile> LayeredVfsDirectory::GetFile(std::string_view name) const { +VirtualFile LayeredVfsDirectory::GetFile(std::string_view name) const { return GetFileRelative(name); } -std::shared_ptr<VfsDirectory> LayeredVfsDirectory::GetSubdirectory(std::string_view name) const { +VirtualDir LayeredVfsDirectory::GetSubdirectory(std::string_view name) const { return GetDirectoryRelative(name); } @@ -57,7 +57,7 @@ std::string LayeredVfsDirectory::GetFullPath() const { return dirs[0]->GetFullPath(); } -std::vector<std::shared_ptr<VfsFile>> LayeredVfsDirectory::GetFiles() const { +std::vector<VirtualFile> LayeredVfsDirectory::GetFiles() const { std::vector<VirtualFile> out; for (const auto& layer : dirs) { for (const auto& file : layer->GetFiles()) { @@ -72,7 +72,7 @@ std::vector<std::shared_ptr<VfsFile>> LayeredVfsDirectory::GetFiles() const { return out; } -std::vector<std::shared_ptr<VfsDirectory>> LayeredVfsDirectory::GetSubdirectories() const { +std::vector<VirtualDir> LayeredVfsDirectory::GetSubdirectories() const { std::vector<std::string> names; for (const auto& layer : dirs) { for (const auto& sd : layer->GetSubdirectories()) { @@ -101,15 +101,15 @@ std::string LayeredVfsDirectory::GetName() const { return name.empty() ? dirs[0]->GetName() : name; } -std::shared_ptr<VfsDirectory> LayeredVfsDirectory::GetParentDirectory() const { +VirtualDir LayeredVfsDirectory::GetParentDirectory() const { return dirs[0]->GetParentDirectory(); } -std::shared_ptr<VfsDirectory> LayeredVfsDirectory::CreateSubdirectory(std::string_view name) { +VirtualDir LayeredVfsDirectory::CreateSubdirectory(std::string_view name) { return nullptr; } -std::shared_ptr<VfsFile> LayeredVfsDirectory::CreateFile(std::string_view name) { +VirtualFile LayeredVfsDirectory::CreateFile(std::string_view name) { return nullptr; } diff --git a/src/core/file_sys/vfs_layered.h b/src/core/file_sys/vfs_layered.h index 8a25c3428..6d7513ac6 100644 --- a/src/core/file_sys/vfs_layered.h +++ b/src/core/file_sys/vfs_layered.h @@ -21,20 +21,20 @@ public: /// Wrapper function to allow for more efficient handling of dirs.size() == 0, 1 cases. static VirtualDir MakeLayeredDirectory(std::vector<VirtualDir> dirs, std::string name = ""); - std::shared_ptr<VfsFile> GetFileRelative(std::string_view path) const override; - std::shared_ptr<VfsDirectory> GetDirectoryRelative(std::string_view path) const override; - std::shared_ptr<VfsFile> GetFile(std::string_view name) const override; - std::shared_ptr<VfsDirectory> GetSubdirectory(std::string_view name) const override; + VirtualFile GetFileRelative(std::string_view path) const override; + VirtualDir GetDirectoryRelative(std::string_view path) const override; + VirtualFile GetFile(std::string_view name) const override; + VirtualDir GetSubdirectory(std::string_view name) const override; std::string GetFullPath() const override; - std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; - std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; + std::vector<VirtualFile> GetFiles() const override; + std::vector<VirtualDir> GetSubdirectories() const override; bool IsWritable() const override; bool IsReadable() const override; std::string GetName() const override; - std::shared_ptr<VfsDirectory> GetParentDirectory() const override; - std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; - std::shared_ptr<VfsFile> CreateFile(std::string_view name) override; + VirtualDir GetParentDirectory() const override; + VirtualDir CreateSubdirectory(std::string_view name) override; + VirtualFile CreateFile(std::string_view name) override; bool DeleteSubdirectory(std::string_view name) override; bool DeleteFile(std::string_view name) override; bool Rename(std::string_view name) override; diff --git a/src/core/file_sys/vfs_offset.cpp b/src/core/file_sys/vfs_offset.cpp index 7714d3de5..056737b54 100644 --- a/src/core/file_sys/vfs_offset.cpp +++ b/src/core/file_sys/vfs_offset.cpp @@ -9,7 +9,7 @@ namespace FileSys { -OffsetVfsFile::OffsetVfsFile(std::shared_ptr<VfsFile> file_, std::size_t size_, std::size_t offset_, +OffsetVfsFile::OffsetVfsFile(VirtualFile file_, std::size_t size_, std::size_t offset_, std::string name_, VirtualDir parent_) : file(file_), offset(offset_), size(size_), name(std::move(name_)), parent(parent_ == nullptr ? file->GetContainingDirectory() : std::move(parent_)) {} @@ -37,7 +37,7 @@ bool OffsetVfsFile::Resize(std::size_t new_size) { return true; } -std::shared_ptr<VfsDirectory> OffsetVfsFile::GetContainingDirectory() const { +VirtualDir OffsetVfsFile::GetContainingDirectory() const { return parent; } diff --git a/src/core/file_sys/vfs_offset.h b/src/core/file_sys/vfs_offset.h index f7b7a3256..b2ccc5c7b 100644 --- a/src/core/file_sys/vfs_offset.h +++ b/src/core/file_sys/vfs_offset.h @@ -17,14 +17,14 @@ namespace FileSys { // the size of this wrapper. class OffsetVfsFile : public VfsFile { public: - OffsetVfsFile(std::shared_ptr<VfsFile> file, std::size_t size, std::size_t offset = 0, + OffsetVfsFile(VirtualFile file, std::size_t size, std::size_t offset = 0, std::string new_name = "", VirtualDir new_parent = nullptr); ~OffsetVfsFile() override; std::string GetName() const override; std::size_t GetSize() const override; bool Resize(std::size_t new_size) override; - std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; + VirtualDir GetContainingDirectory() const override; bool IsWritable() const override; bool IsReadable() const override; std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override; @@ -42,7 +42,7 @@ public: private: std::size_t TrimToFit(std::size_t r_size, std::size_t r_offset) const; - std::shared_ptr<VfsFile> file; + VirtualFile file; std::size_t offset; std::size_t size; std::string name; diff --git a/src/core/file_sys/vfs_real.cpp b/src/core/file_sys/vfs_real.cpp index 488687ba9..a287eebe3 100644 --- a/src/core/file_sys/vfs_real.cpp +++ b/src/core/file_sys/vfs_real.cpp @@ -263,7 +263,7 @@ bool RealVfsFile::Resize(std::size_t new_size) { return backing->Resize(new_size); } -std::shared_ptr<VfsDirectory> RealVfsFile::GetContainingDirectory() const { +VirtualDir RealVfsFile::GetContainingDirectory() const { return base.OpenDirectory(parent_path, perms); } @@ -352,7 +352,7 @@ RealVfsDirectory::RealVfsDirectory(RealVfsFilesystem& base_, const std::string& RealVfsDirectory::~RealVfsDirectory() = default; -std::shared_ptr<VfsFile> RealVfsDirectory::GetFileRelative(std::string_view path) const { +VirtualFile RealVfsDirectory::GetFileRelative(std::string_view path) const { const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path)); if (!FS::Exists(full_path) || FS::IsDirectory(full_path)) { return nullptr; @@ -360,7 +360,7 @@ std::shared_ptr<VfsFile> RealVfsDirectory::GetFileRelative(std::string_view path return base.OpenFile(full_path, perms); } -std::shared_ptr<VfsDirectory> RealVfsDirectory::GetDirectoryRelative(std::string_view path) const { +VirtualDir RealVfsDirectory::GetDirectoryRelative(std::string_view path) const { const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path)); if (!FS::Exists(full_path) || !FS::IsDirectory(full_path)) { return nullptr; @@ -368,20 +368,20 @@ std::shared_ptr<VfsDirectory> RealVfsDirectory::GetDirectoryRelative(std::string return base.OpenDirectory(full_path, perms); } -std::shared_ptr<VfsFile> RealVfsDirectory::GetFile(std::string_view name) const { +VirtualFile RealVfsDirectory::GetFile(std::string_view name) const { return GetFileRelative(name); } -std::shared_ptr<VfsDirectory> RealVfsDirectory::GetSubdirectory(std::string_view name) const { +VirtualDir RealVfsDirectory::GetSubdirectory(std::string_view name) const { return GetDirectoryRelative(name); } -std::shared_ptr<VfsFile> RealVfsDirectory::CreateFileRelative(std::string_view path) { +VirtualFile RealVfsDirectory::CreateFileRelative(std::string_view path) { const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path)); return base.CreateFile(full_path, perms); } -std::shared_ptr<VfsDirectory> RealVfsDirectory::CreateDirectoryRelative(std::string_view path) { +VirtualDir RealVfsDirectory::CreateDirectoryRelative(std::string_view path) { const auto full_path = FS::SanitizePath(this->path + DIR_SEP + std::string(path)); return base.CreateDirectory(full_path, perms); } @@ -391,11 +391,11 @@ bool RealVfsDirectory::DeleteSubdirectoryRecursive(std::string_view name) { return base.DeleteDirectory(full_path); } -std::vector<std::shared_ptr<VfsFile>> RealVfsDirectory::GetFiles() const { +std::vector<VirtualFile> RealVfsDirectory::GetFiles() const { return IterateEntries<RealVfsFile, VfsFile>(); } -std::vector<std::shared_ptr<VfsDirectory>> RealVfsDirectory::GetSubdirectories() const { +std::vector<VirtualDir> RealVfsDirectory::GetSubdirectories() const { return IterateEntries<RealVfsDirectory, VfsDirectory>(); } @@ -411,7 +411,7 @@ std::string RealVfsDirectory::GetName() const { return path_components.back(); } -std::shared_ptr<VfsDirectory> RealVfsDirectory::GetParentDirectory() const { +VirtualDir RealVfsDirectory::GetParentDirectory() const { if (path_components.size() <= 1) { return nullptr; } @@ -419,12 +419,12 @@ std::shared_ptr<VfsDirectory> RealVfsDirectory::GetParentDirectory() const { return base.OpenDirectory(parent_path, perms); } -std::shared_ptr<VfsDirectory> RealVfsDirectory::CreateSubdirectory(std::string_view name) { +VirtualDir RealVfsDirectory::CreateSubdirectory(std::string_view name) { const std::string subdir_path = (path + DIR_SEP).append(name); return base.CreateDirectory(subdir_path, perms); } -std::shared_ptr<VfsFile> RealVfsDirectory::CreateFile(std::string_view name) { +VirtualFile RealVfsDirectory::CreateFile(std::string_view name) { const std::string file_path = (path + DIR_SEP).append(name); return base.CreateFile(file_path, perms); } diff --git a/src/core/file_sys/vfs_real.h b/src/core/file_sys/vfs_real.h index 0b537b22c..23e99865e 100644 --- a/src/core/file_sys/vfs_real.h +++ b/src/core/file_sys/vfs_real.h @@ -50,7 +50,7 @@ public: std::string GetName() const override; std::size_t GetSize() const override; bool Resize(std::size_t new_size) override; - std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; + VirtualDir GetContainingDirectory() const override; bool IsWritable() const override; bool IsReadable() const override; std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override; @@ -79,21 +79,21 @@ class RealVfsDirectory : public VfsDirectory { public: ~RealVfsDirectory() override; - std::shared_ptr<VfsFile> GetFileRelative(std::string_view path) const override; - std::shared_ptr<VfsDirectory> GetDirectoryRelative(std::string_view path) const override; - std::shared_ptr<VfsFile> GetFile(std::string_view name) const override; - std::shared_ptr<VfsDirectory> GetSubdirectory(std::string_view name) const override; - std::shared_ptr<VfsFile> CreateFileRelative(std::string_view path) override; - std::shared_ptr<VfsDirectory> CreateDirectoryRelative(std::string_view path) override; + VirtualFile GetFileRelative(std::string_view path) const override; + VirtualDir GetDirectoryRelative(std::string_view path) const override; + VirtualFile GetFile(std::string_view name) const override; + VirtualDir GetSubdirectory(std::string_view name) const override; + VirtualFile CreateFileRelative(std::string_view path) override; + VirtualDir CreateDirectoryRelative(std::string_view path) override; bool DeleteSubdirectoryRecursive(std::string_view name) override; - std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; - std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; + std::vector<VirtualFile> GetFiles() const override; + std::vector<VirtualDir> GetSubdirectories() const override; bool IsWritable() const override; bool IsReadable() const override; std::string GetName() const override; - std::shared_ptr<VfsDirectory> GetParentDirectory() const override; - std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; - std::shared_ptr<VfsFile> CreateFile(std::string_view name) override; + VirtualDir GetParentDirectory() const override; + VirtualDir CreateSubdirectory(std::string_view name) override; + VirtualFile CreateFile(std::string_view name) override; bool DeleteSubdirectory(std::string_view name) override; bool DeleteFile(std::string_view name) override; bool Rename(std::string_view name) override; diff --git a/src/core/file_sys/vfs_static.h b/src/core/file_sys/vfs_static.h index 8b27c30fa..c840b24b9 100644 --- a/src/core/file_sys/vfs_static.h +++ b/src/core/file_sys/vfs_static.h @@ -31,7 +31,7 @@ public: return true; } - std::shared_ptr<VfsDirectory> GetContainingDirectory() const override { + VirtualDir GetContainingDirectory() const override { return parent; } diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp index 75fc04302..c1ec1e645 100644 --- a/src/core/file_sys/vfs_vector.cpp +++ b/src/core/file_sys/vfs_vector.cpp @@ -25,7 +25,7 @@ bool VectorVfsFile::Resize(size_t new_size) { return true; } -std::shared_ptr<VfsDirectory> VectorVfsFile::GetContainingDirectory() const { +VirtualDir VectorVfsFile::GetContainingDirectory() const { return parent; } @@ -68,11 +68,11 @@ VectorVfsDirectory::VectorVfsDirectory(std::vector<VirtualFile> files_, VectorVfsDirectory::~VectorVfsDirectory() = default; -std::vector<std::shared_ptr<VfsFile>> VectorVfsDirectory::GetFiles() const { +std::vector<VirtualFile> VectorVfsDirectory::GetFiles() const { return files; } -std::vector<std::shared_ptr<VfsDirectory>> VectorVfsDirectory::GetSubdirectories() const { +std::vector<VirtualDir> VectorVfsDirectory::GetSubdirectories() const { return dirs; } @@ -88,7 +88,7 @@ std::string VectorVfsDirectory::GetName() const { return name; } -std::shared_ptr<VfsDirectory> VectorVfsDirectory::GetParentDirectory() const { +VirtualDir VectorVfsDirectory::GetParentDirectory() const { return parent; } @@ -116,11 +116,11 @@ bool VectorVfsDirectory::Rename(std::string_view name_) { return true; } -std::shared_ptr<VfsDirectory> VectorVfsDirectory::CreateSubdirectory(std::string_view name) { +VirtualDir VectorVfsDirectory::CreateSubdirectory(std::string_view name) { return nullptr; } -std::shared_ptr<VfsFile> VectorVfsDirectory::CreateFile(std::string_view name) { +VirtualFile VectorVfsDirectory::CreateFile(std::string_view name) { return nullptr; } diff --git a/src/core/file_sys/vfs_vector.h b/src/core/file_sys/vfs_vector.h index 95d3da2f2..2aff9ca34 100644 --- a/src/core/file_sys/vfs_vector.h +++ b/src/core/file_sys/vfs_vector.h @@ -17,9 +17,9 @@ namespace FileSys { template <std::size_t size> class ArrayVfsFile : public VfsFile { public: - explicit ArrayVfsFile(const std::array<u8, size>& data, std::string name = "", - VirtualDir parent = nullptr) - : data(data), name(std::move(name)), parent(std::move(parent)) {} + explicit ArrayVfsFile(const std::array<u8, size>& data_, std::string name_ = "", + VirtualDir parent_ = nullptr) + : data(data_), name(std::move(name_)), parent(std::move(parent_)) {} std::string GetName() const override { return name; @@ -33,7 +33,7 @@ public: return false; } - std::shared_ptr<VfsDirectory> GetContainingDirectory() const override { + VirtualDir GetContainingDirectory() const override { return parent; } @@ -51,12 +51,12 @@ public: return read; } - std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override { + std::size_t Write(const u8* data_, std::size_t length, std::size_t offset) override { return 0; } - bool Rename(std::string_view name) override { - this->name = name; + bool Rename(std::string_view new_name) override { + name = new_name; return true; } @@ -82,7 +82,7 @@ public: std::string GetName() const override; std::size_t GetSize() const override; bool Resize(std::size_t new_size) override; - std::shared_ptr<VfsDirectory> GetContainingDirectory() const override; + VirtualDir GetContainingDirectory() const override; bool IsWritable() const override; bool IsReadable() const override; std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override; @@ -106,17 +106,17 @@ public: VirtualDir parent = nullptr); ~VectorVfsDirectory() override; - std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; - std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; + std::vector<VirtualFile> GetFiles() const override; + std::vector<VirtualDir> GetSubdirectories() const override; bool IsWritable() const override; bool IsReadable() const override; std::string GetName() const override; - std::shared_ptr<VfsDirectory> GetParentDirectory() const override; + VirtualDir GetParentDirectory() const override; bool DeleteSubdirectory(std::string_view name) override; bool DeleteFile(std::string_view name) override; bool Rename(std::string_view name) override; - std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; - std::shared_ptr<VfsFile> CreateFile(std::string_view name) override; + VirtualDir CreateSubdirectory(std::string_view name) override; + VirtualFile CreateFile(std::string_view name) override; virtual void AddFile(VirtualFile file); virtual void AddDirectory(VirtualDir dir); diff --git a/src/core/file_sys/xts_archive.cpp b/src/core/file_sys/xts_archive.cpp index 24c58e7ae..814fd5680 100644 --- a/src/core/file_sys/xts_archive.cpp +++ b/src/core/file_sys/xts_archive.cpp @@ -152,11 +152,11 @@ NAXContentType NAX::GetContentType() const { return type; } -std::vector<std::shared_ptr<VfsFile>> NAX::GetFiles() const { +std::vector<VirtualFile> NAX::GetFiles() const { return {dec_file}; } -std::vector<std::shared_ptr<VfsDirectory>> NAX::GetSubdirectories() const { +std::vector<VirtualDir> NAX::GetSubdirectories() const { return {}; } @@ -164,7 +164,7 @@ std::string NAX::GetName() const { return file->GetName(); } -std::shared_ptr<VfsDirectory> NAX::GetParentDirectory() const { +VirtualDir NAX::GetParentDirectory() const { return file->GetContainingDirectory(); } diff --git a/src/core/file_sys/xts_archive.h b/src/core/file_sys/xts_archive.h index c472e226e..63a032b68 100644 --- a/src/core/file_sys/xts_archive.h +++ b/src/core/file_sys/xts_archive.h @@ -47,13 +47,13 @@ public: NAXContentType GetContentType() const; - std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; + std::vector<VirtualFile> GetFiles() const override; - std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; + std::vector<VirtualDir> GetSubdirectories() const override; std::string GetName() const override; - std::shared_ptr<VfsDirectory> GetParentDirectory() const override; + VirtualDir GetParentDirectory() const override; private: Loader::ResultStatus Parse(std::string_view path); diff --git a/src/core/frontend/applets/error.cpp b/src/core/frontend/applets/error.cpp index 4002a9211..dceb20ff8 100644 --- a/src/core/frontend/applets/error.cpp +++ b/src/core/frontend/applets/error.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/logging/log.h" #include "core/frontend/applets/error.h" namespace Core::Frontend { @@ -10,7 +11,7 @@ ErrorApplet::~ErrorApplet() = default; void DefaultErrorApplet::ShowError(ResultCode error, std::function<void()> finished) const { LOG_CRITICAL(Service_Fatal, "Application requested error display: {:04}-{:04} (raw={:08X})", - static_cast<u32>(error.module.Value()), error.description.Value(), error.raw); + error.module.Value(), error.description.Value(), error.raw); } void DefaultErrorApplet::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time, @@ -18,7 +19,7 @@ void DefaultErrorApplet::ShowErrorWithTimestamp(ResultCode error, std::chrono::s LOG_CRITICAL( Service_Fatal, "Application requested error display: {:04X}-{:04X} (raw={:08X}) with timestamp={:016X}", - static_cast<u32>(error.module.Value()), error.description.Value(), error.raw, time.count()); + error.module.Value(), error.description.Value(), error.raw, time.count()); } void DefaultErrorApplet::ShowCustomErrorText(ResultCode error, std::string main_text, @@ -26,7 +27,7 @@ void DefaultErrorApplet::ShowCustomErrorText(ResultCode error, std::string main_ std::function<void()> finished) const { LOG_CRITICAL(Service_Fatal, "Application requested custom error with error_code={:04X}-{:04X} (raw={:08X})", - static_cast<u32>(error.module.Value()), error.description.Value(), error.raw); + error.module.Value(), error.description.Value(), error.raw); LOG_CRITICAL(Service_Fatal, " Main Text: {}", main_text); LOG_CRITICAL(Service_Fatal, " Detail Text: {}", detail_text); } diff --git a/src/core/frontend/applets/general_frontend.cpp b/src/core/frontend/applets/general_frontend.cpp index c30b36de7..7483ffb76 100644 --- a/src/core/frontend/applets/general_frontend.cpp +++ b/src/core/frontend/applets/general_frontend.cpp @@ -53,72 +53,4 @@ void DefaultPhotoViewerApplet::ShowAllPhotos(std::function<void()> finished) con finished(); } -ECommerceApplet::~ECommerceApplet() = default; - -DefaultECommerceApplet::~DefaultECommerceApplet() = default; - -void DefaultECommerceApplet::ShowApplicationInformation( - std::function<void()> finished, u64 title_id, std::optional<u128> user_id, - std::optional<bool> full_display, std::optional<std::string> extra_parameter) { - const auto value = user_id.value_or(u128{}); - LOG_INFO(Service_AM, - "Application requested frontend show application information for EShop, " - "title_id={:016X}, user_id={:016X}{:016X}, full_display={}, extra_parameter={}", - title_id, value[1], value[0], - full_display.has_value() ? fmt::format("{}", *full_display) : "null", - extra_parameter.value_or("null")); - finished(); -} - -void DefaultECommerceApplet::ShowAddOnContentList(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id, - std::optional<bool> full_display) { - const auto value = user_id.value_or(u128{}); - LOG_INFO(Service_AM, - "Application requested frontend show add on content list for EShop, " - "title_id={:016X}, user_id={:016X}{:016X}, full_display={}", - title_id, value[1], value[0], - full_display.has_value() ? fmt::format("{}", *full_display) : "null"); - finished(); -} - -void DefaultECommerceApplet::ShowSubscriptionList(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id) { - const auto value = user_id.value_or(u128{}); - LOG_INFO(Service_AM, - "Application requested frontend show subscription list for EShop, title_id={:016X}, " - "user_id={:016X}{:016X}", - title_id, value[1], value[0]); - finished(); -} - -void DefaultECommerceApplet::ShowConsumableItemList(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id) { - const auto value = user_id.value_or(u128{}); - LOG_INFO( - Service_AM, - "Application requested frontend show consumable item list for EShop, title_id={:016X}, " - "user_id={:016X}{:016X}", - title_id, value[1], value[0]); - finished(); -} - -void DefaultECommerceApplet::ShowShopHome(std::function<void()> finished, u128 user_id, - bool full_display) { - LOG_INFO(Service_AM, - "Application requested frontend show home menu for EShop, user_id={:016X}{:016X}, " - "full_display={}", - user_id[1], user_id[0], full_display); - finished(); -} - -void DefaultECommerceApplet::ShowSettings(std::function<void()> finished, u128 user_id, - bool full_display) { - LOG_INFO(Service_AM, - "Application requested frontend show settings menu for EShop, user_id={:016X}{:016X}, " - "full_display={}", - user_id[1], user_id[0], full_display); - finished(); -} - } // namespace Core::Frontend diff --git a/src/core/frontend/applets/general_frontend.h b/src/core/frontend/applets/general_frontend.h index 4b63f828e..b713b14ee 100644 --- a/src/core/frontend/applets/general_frontend.h +++ b/src/core/frontend/applets/general_frontend.h @@ -58,55 +58,4 @@ public: void ShowAllPhotos(std::function<void()> finished) const override; }; -class ECommerceApplet { -public: - virtual ~ECommerceApplet(); - - // Shows a page with application icons, description, name, and price. - virtual void ShowApplicationInformation(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id = {}, - std::optional<bool> full_display = {}, - std::optional<std::string> extra_parameter = {}) = 0; - - // Shows a page with all of the add on content available for a game, with name, description, and - // price. - virtual void ShowAddOnContentList(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id = {}, - std::optional<bool> full_display = {}) = 0; - - // Shows a page with all of the subscriptions (recurring payments) for a game, with name, - // description, price, and renewal period. - virtual void ShowSubscriptionList(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id = {}) = 0; - - // Shows a page with a list of any additional game related purchasable items (DLC, - // subscriptions, etc) for a particular game, with name, description, type, and price. - virtual void ShowConsumableItemList(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id = {}) = 0; - - // Shows the home page of the shop. - virtual void ShowShopHome(std::function<void()> finished, u128 user_id, bool full_display) = 0; - - // Shows the user settings page of the shop. - virtual void ShowSettings(std::function<void()> finished, u128 user_id, bool full_display) = 0; -}; - -class DefaultECommerceApplet : public ECommerceApplet { -public: - ~DefaultECommerceApplet() override; - - void ShowApplicationInformation(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id, std::optional<bool> full_display, - std::optional<std::string> extra_parameter) override; - void ShowAddOnContentList(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id, - std::optional<bool> full_display) override; - void ShowSubscriptionList(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id) override; - void ShowConsumableItemList(std::function<void()> finished, u64 title_id, - std::optional<u128> user_id) override; - void ShowShopHome(std::function<void()> finished, u128 user_id, bool full_display) override; - void ShowSettings(std::function<void()> finished, u128 user_id, bool full_display) override; -}; - } // namespace Core::Frontend diff --git a/src/core/frontend/applets/web_browser.cpp b/src/core/frontend/applets/web_browser.cpp index 528295ffc..50db6a654 100644 --- a/src/core/frontend/applets/web_browser.cpp +++ b/src/core/frontend/applets/web_browser.cpp @@ -11,14 +11,22 @@ WebBrowserApplet::~WebBrowserApplet() = default; DefaultWebBrowserApplet::~DefaultWebBrowserApplet() = default; -void DefaultWebBrowserApplet::OpenPageLocal(std::string_view filename, - std::function<void()> unpack_romfs_callback, - std::function<void()> finished_callback) { - LOG_INFO(Service_AM, - "(STUBBED) called - No suitable web browser implementation found to open website page " - "at '{}'!", - filename); - finished_callback(); +void DefaultWebBrowserApplet::OpenLocalWebPage( + std::string_view local_url, std::function<void()> extract_romfs_callback, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback) const { + LOG_WARNING(Service_AM, "(STUBBED) called, backend requested to open local web page at {}", + local_url); + + callback(Service::AM::Applets::WebExitReason::WindowClosed, "http://localhost/"); +} + +void DefaultWebBrowserApplet::OpenExternalWebPage( + std::string_view external_url, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback) const { + LOG_WARNING(Service_AM, "(STUBBED) called, backend requested to open external web page at {}", + external_url); + + callback(Service::AM::Applets::WebExitReason::WindowClosed, "http://localhost/"); } } // namespace Core::Frontend diff --git a/src/core/frontend/applets/web_browser.h b/src/core/frontend/applets/web_browser.h index 110e33bc4..1c5ef19a9 100644 --- a/src/core/frontend/applets/web_browser.h +++ b/src/core/frontend/applets/web_browser.h @@ -7,22 +7,34 @@ #include <functional> #include <string_view> +#include "core/hle/service/am/applets/web_types.h" + namespace Core::Frontend { class WebBrowserApplet { public: virtual ~WebBrowserApplet(); - virtual void OpenPageLocal(std::string_view url, std::function<void()> unpack_romfs_callback, - std::function<void()> finished_callback) = 0; + virtual void OpenLocalWebPage( + std::string_view local_url, std::function<void()> extract_romfs_callback, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback) const = 0; + + virtual void OpenExternalWebPage( + std::string_view external_url, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback) const = 0; }; class DefaultWebBrowserApplet final : public WebBrowserApplet { public: ~DefaultWebBrowserApplet() override; - void OpenPageLocal(std::string_view url, std::function<void()> unpack_romfs_callback, - std::function<void()> finished_callback) override; + void OpenLocalWebPage(std::string_view local_url, std::function<void()> extract_romfs_callback, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> + callback) const override; + + void OpenExternalWebPage(std::string_view external_url, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> + callback) const override; }; } // namespace Core::Frontend diff --git a/src/core/frontend/input_interpreter.cpp b/src/core/frontend/input_interpreter.cpp new file mode 100644 index 000000000..66ae506cd --- /dev/null +++ b/src/core/frontend/input_interpreter.cpp @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "core/frontend/input_interpreter.h" +#include "core/hle/service/hid/controllers/npad.h" +#include "core/hle/service/hid/hid.h" +#include "core/hle/service/sm/sm.h" + +InputInterpreter::InputInterpreter(Core::System& system) + : npad{system.ServiceManager() + .GetService<Service::HID::Hid>("hid") + ->GetAppletResource() + ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad)} {} + +InputInterpreter::~InputInterpreter() = default; + +void InputInterpreter::PollInput() { + const u32 button_state = npad.GetAndResetPressState(); + + previous_index = current_index; + current_index = (current_index + 1) % button_states.size(); + + button_states[current_index] = button_state; +} + +bool InputInterpreter::IsButtonPressedOnce(HIDButton button) const { + const bool current_press = + (button_states[current_index] & (1U << static_cast<u8>(button))) != 0; + const bool previous_press = + (button_states[previous_index] & (1U << static_cast<u8>(button))) != 0; + + return current_press && !previous_press; +} + +bool InputInterpreter::IsButtonHeld(HIDButton button) const { + u32 held_buttons{button_states[0]}; + + for (std::size_t i = 1; i < button_states.size(); ++i) { + held_buttons &= button_states[i]; + } + + return (held_buttons & (1U << static_cast<u8>(button))) != 0; +} diff --git a/src/core/frontend/input_interpreter.h b/src/core/frontend/input_interpreter.h new file mode 100644 index 000000000..fea9aebe6 --- /dev/null +++ b/src/core/frontend/input_interpreter.h @@ -0,0 +1,120 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include "common/common_types.h" + +namespace Core { +class System; +} + +namespace Service::HID { +class Controller_NPad; +} + +enum class HIDButton : u8 { + A, + B, + X, + Y, + LStick, + RStick, + L, + R, + ZL, + ZR, + Plus, + Minus, + + DLeft, + DUp, + DRight, + DDown, + + LStickLeft, + LStickUp, + LStickRight, + LStickDown, + + RStickLeft, + RStickUp, + RStickRight, + RStickDown, + + LeftSL, + LeftSR, + + RightSL, + RightSR, +}; + +/** + * The InputInterpreter class interfaces with HID to retrieve button press states. + * Input is intended to be polled every 50ms so that a button is considered to be + * held down after 400ms has elapsed since the initial button press and subsequent + * repeated presses occur every 50ms. + */ +class InputInterpreter { +public: + explicit InputInterpreter(Core::System& system); + virtual ~InputInterpreter(); + + /// Gets a button state from HID and inserts it into the array of button states. + void PollInput(); + + /** + * The specified button is considered to be pressed once + * if it is currently pressed and not pressed previously. + * + * @param button The button to check. + * + * @returns True when the button is pressed once. + */ + [[nodiscard]] bool IsButtonPressedOnce(HIDButton button) const; + + /** + * Checks whether any of the buttons in the parameter list is pressed once. + * + * @tparam HIDButton The buttons to check. + * + * @returns True when at least one of the buttons is pressed once. + */ + template <HIDButton... T> + [[nodiscard]] bool IsAnyButtonPressedOnce() { + return (IsButtonPressedOnce(T) || ...); + } + + /** + * The specified button is considered to be held down if it is pressed in all 9 button states. + * + * @param button The button to check. + * + * @returns True when the button is held down. + */ + [[nodiscard]] bool IsButtonHeld(HIDButton button) const; + + /** + * Checks whether any of the buttons in the parameter list is held down. + * + * @tparam HIDButton The buttons to check. + * + * @returns True when at least one of the buttons is held down. + */ + template <HIDButton... T> + [[nodiscard]] bool IsAnyButtonHeld() { + return (IsButtonHeld(T) || ...); + } + +private: + Service::HID::Controller_NPad& npad; + + /// Stores 9 consecutive button states polled from HID. + std::array<u32, 9> button_states{}; + + std::size_t previous_index{}; + std::size_t current_index{}; +}; diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index d57776ce9..56cc911d1 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h @@ -166,8 +166,23 @@ public: ValidateHeader(); } + void PushImpl(s8 value); + void PushImpl(s16 value); + void PushImpl(s32 value); + void PushImpl(s64 value); + void PushImpl(u8 value); + void PushImpl(u16 value); + void PushImpl(u32 value); + void PushImpl(u64 value); + void PushImpl(float value); + void PushImpl(double value); + void PushImpl(bool value); + void PushImpl(ResultCode value); + template <typename T> - void Push(T value); + void Push(T value) { + return PushImpl(value); + } template <typename First, typename... Other> void Push(const First& first_value, const Other&... other_values); @@ -215,13 +230,11 @@ private: /// Push /// -template <> -inline void ResponseBuilder::Push(s32 value) { +inline void ResponseBuilder::PushImpl(s32 value) { cmdbuf[index++] = static_cast<u32>(value); } -template <> -inline void ResponseBuilder::Push(u32 value) { +inline void ResponseBuilder::PushImpl(u32 value) { cmdbuf[index++] = value; } @@ -233,62 +246,52 @@ void ResponseBuilder::PushRaw(const T& value) { index += (sizeof(T) + 3) / 4; // round up to word length } -template <> -inline void ResponseBuilder::Push(ResultCode value) { +inline void ResponseBuilder::PushImpl(ResultCode value) { // Result codes are actually 64-bit in the IPC buffer, but only the high part is discarded. Push(value.raw); Push<u32>(0); } -template <> -inline void ResponseBuilder::Push(s8 value) { +inline void ResponseBuilder::PushImpl(s8 value) { PushRaw(value); } -template <> -inline void ResponseBuilder::Push(s16 value) { +inline void ResponseBuilder::PushImpl(s16 value) { PushRaw(value); } -template <> -inline void ResponseBuilder::Push(s64 value) { - Push(static_cast<u32>(value)); - Push(static_cast<u32>(value >> 32)); +inline void ResponseBuilder::PushImpl(s64 value) { + PushImpl(static_cast<u32>(value)); + PushImpl(static_cast<u32>(value >> 32)); } -template <> -inline void ResponseBuilder::Push(u8 value) { +inline void ResponseBuilder::PushImpl(u8 value) { PushRaw(value); } -template <> -inline void ResponseBuilder::Push(u16 value) { +inline void ResponseBuilder::PushImpl(u16 value) { PushRaw(value); } -template <> -inline void ResponseBuilder::Push(u64 value) { - Push(static_cast<u32>(value)); - Push(static_cast<u32>(value >> 32)); +inline void ResponseBuilder::PushImpl(u64 value) { + PushImpl(static_cast<u32>(value)); + PushImpl(static_cast<u32>(value >> 32)); } -template <> -inline void ResponseBuilder::Push(float value) { +inline void ResponseBuilder::PushImpl(float value) { u32 integral; std::memcpy(&integral, &value, sizeof(u32)); - Push(integral); + PushImpl(integral); } -template <> -inline void ResponseBuilder::Push(double value) { +inline void ResponseBuilder::PushImpl(double value) { u64 integral; std::memcpy(&integral, &value, sizeof(u64)); - Push(integral); + PushImpl(integral); } -template <> -inline void ResponseBuilder::Push(bool value) { - Push(static_cast<u8>(value)); +inline void ResponseBuilder::PushImpl(bool value) { + PushImpl(static_cast<u8>(value)); } template <typename First, typename... Other> diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 048acd30e..20ffa7d47 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -12,8 +12,9 @@ #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/time_manager.h" #include "core/hle/result.h" @@ -58,7 +59,7 @@ ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 v } ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) { - SchedulerLock lock(system.Kernel()); + KScopedSchedulerLock lock(system.Kernel()); const std::vector<std::shared_ptr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); WakeThreads(waiting_threads, num_to_wake); @@ -67,7 +68,7 @@ ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) { ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { - SchedulerLock lock(system.Kernel()); + KScopedSchedulerLock lock(system.Kernel()); auto& memory = system.Memory(); // Ensure that we can write to the address. @@ -92,7 +93,7 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { - SchedulerLock lock(system.Kernel()); + KScopedSchedulerLock lock(system.Kernel()); auto& memory = system.Memory(); // Ensure that we can write to the address. @@ -153,11 +154,11 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6 bool should_decrement) { auto& memory = system.Memory(); auto& kernel = system.Kernel(); - Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); + Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread(); Handle event_handle = InvalidHandle; { - SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); + KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); if (current_thread->IsPendingTermination()) { lock.CancelSleep(); @@ -210,7 +211,7 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6 } { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); if (current_thread->IsWaitingForArbitration()) { RemoveThread(SharedFrom(current_thread)); current_thread->WaitForArbitration(false); @@ -223,11 +224,11 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6 ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { auto& memory = system.Memory(); auto& kernel = system.Kernel(); - Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); + Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread(); Handle event_handle = InvalidHandle; { - SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); + KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); if (current_thread->IsPendingTermination()) { lock.CancelSleep(); @@ -265,7 +266,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t } { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); if (current_thread->IsWaitingForArbitration()) { RemoveThread(SharedFrom(current_thread)); current_thread->WaitForArbitration(false); diff --git a/src/core/hle/kernel/global_scheduler_context.cpp b/src/core/hle/kernel/global_scheduler_context.cpp new file mode 100644 index 000000000..a133e8ed0 --- /dev/null +++ b/src/core/hle/kernel/global_scheduler_context.cpp @@ -0,0 +1,52 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <mutex> + +#include "common/assert.h" +#include "core/core.h" +#include "core/hle/kernel/global_scheduler_context.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/kernel.h" + +namespace Kernel { + +GlobalSchedulerContext::GlobalSchedulerContext(KernelCore& kernel) + : kernel{kernel}, scheduler_lock{kernel} {} + +GlobalSchedulerContext::~GlobalSchedulerContext() = default; + +void GlobalSchedulerContext::AddThread(std::shared_ptr<Thread> thread) { + std::scoped_lock lock{global_list_guard}; + thread_list.push_back(std::move(thread)); +} + +void GlobalSchedulerContext::RemoveThread(std::shared_ptr<Thread> thread) { + std::scoped_lock lock{global_list_guard}; + thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), + thread_list.end()); +} + +void GlobalSchedulerContext::PreemptThreads() { + // The priority levels at which the global scheduler preempts threads every 10 ms. They are + // ordered from Core 0 to Core 3. + static constexpr std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities{ + 59, + 59, + 59, + 63, + }; + + ASSERT(IsLocked()); + for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { + const u32 priority = preemption_priorities[core_id]; + kernel.Scheduler(core_id).RotateScheduledQueue(core_id, priority); + } +} + +bool GlobalSchedulerContext::IsLocked() const { + return scheduler_lock.IsLockedByCurrentThread(); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/global_scheduler_context.h b/src/core/hle/kernel/global_scheduler_context.h new file mode 100644 index 000000000..5c7b89290 --- /dev/null +++ b/src/core/hle/kernel/global_scheduler_context.h @@ -0,0 +1,81 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> +#include <vector> + +#include "common/common_types.h" +#include "common/spin_lock.h" +#include "core/hardware_properties.h" +#include "core/hle/kernel/k_priority_queue.h" +#include "core/hle/kernel/k_scheduler_lock.h" +#include "core/hle/kernel/thread.h" + +namespace Kernel { + +class KernelCore; +class SchedulerLock; + +using KSchedulerPriorityQueue = + KPriorityQueue<Thread, Core::Hardware::NUM_CPU_CORES, THREADPRIO_LOWEST, THREADPRIO_HIGHEST>; +constexpr s32 HighestCoreMigrationAllowedPriority = 2; + +class GlobalSchedulerContext final { + friend class KScheduler; + +public: + using LockType = KAbstractSchedulerLock<KScheduler>; + + explicit GlobalSchedulerContext(KernelCore& kernel); + ~GlobalSchedulerContext(); + + /// Adds a new thread to the scheduler + void AddThread(std::shared_ptr<Thread> thread); + + /// Removes a thread from the scheduler + void RemoveThread(std::shared_ptr<Thread> thread); + + /// Returns a list of all threads managed by the scheduler + [[nodiscard]] const std::vector<std::shared_ptr<Thread>>& GetThreadList() const { + return thread_list; + } + + /** + * Rotates the scheduling queues of threads at a preemption priority and then does + * some core rebalancing. Preemption priorities can be found in the array + * 'preemption_priorities'. + * + * @note This operation happens every 10ms. + */ + void PreemptThreads(); + + /// Returns true if the global scheduler lock is acquired + bool IsLocked() const; + + [[nodiscard]] LockType& SchedulerLock() { + return scheduler_lock; + } + + [[nodiscard]] const LockType& SchedulerLock() const { + return scheduler_lock; + } + +private: + friend class KScopedSchedulerLock; + friend class KScopedSchedulerLockAndSleep; + + KernelCore& kernel; + + std::atomic_bool scheduler_update_needed{}; + KSchedulerPriorityQueue priority_queue; + LockType scheduler_lock; + + /// Lists all thread ids that aren't deleted/etc. + std::vector<std::shared_ptr<Thread>> thread_list; + Common::SpinLock global_list_guard{}; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp index 3e745c18b..40988b0fd 100644 --- a/src/core/hle/kernel/handle_table.cpp +++ b/src/core/hle/kernel/handle_table.cpp @@ -8,9 +8,9 @@ #include "core/core.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" namespace Kernel { @@ -105,7 +105,7 @@ bool HandleTable::IsValid(Handle handle) const { std::shared_ptr<Object> HandleTable::GetGeneric(Handle handle) const { if (handle == CurrentThread) { - return SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); + return SharedFrom(kernel.CurrentScheduler()->GetCurrentThread()); } else if (handle == CurrentProcess) { return SharedFrom(kernel.CurrentProcess()); } diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index 81f85643b..83decf6cf 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -17,11 +17,12 @@ #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/hle_ipc.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/server_session.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/time_manager.h" @@ -45,44 +46,6 @@ void SessionRequestHandler::ClientDisconnected( boost::range::remove_erase(connected_sessions, server_session); } -std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread( - const std::string& reason, u64 timeout, WakeupCallback&& callback, - std::shared_ptr<WritableEvent> writable_event) { - // Put the client thread to sleep until the wait event is signaled or the timeout expires. - - if (!writable_event) { - // Create event if not provided - const auto pair = WritableEvent::CreateEventPair(kernel, "HLE Pause Event: " + reason); - writable_event = pair.writable; - } - - { - Handle event_handle = InvalidHandle; - SchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout); - thread->SetHLECallback( - [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool { - ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT - ? ThreadWakeupReason::Timeout - : ThreadWakeupReason::Signal; - callback(thread, context, reason); - context.WriteToOutgoingCommandBuffer(*thread); - return true; - }); - const auto readable_event{writable_event->GetReadableEvent()}; - writable_event->Clear(); - thread->SetHLESyncObject(readable_event.get()); - thread->SetStatus(ThreadStatus::WaitHLEEvent); - thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); - readable_event->AddWaitingThread(thread); - lock.Release(); - thread->SetHLETimeEvent(event_handle); - } - - is_thread_waiting = true; - - return writable_event; -} - HLERequestContext::HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory, std::shared_ptr<ServerSession> server_session, std::shared_ptr<Thread> thread) diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index c31a65476..b112e1ebd 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h @@ -129,23 +129,6 @@ public: using WakeupCallback = std::function<void( std::shared_ptr<Thread> thread, HLERequestContext& context, ThreadWakeupReason reason)>; - /** - * Puts the specified guest thread to sleep until the returned event is signaled or until the - * specified timeout expires. - * @param reason Reason for pausing the thread, to be used for debugging purposes. - * @param timeout Timeout in nanoseconds after which the thread will be awoken and the callback - * invoked with a Timeout reason. - * @param callback Callback to be invoked when the thread is resumed. This callback must write - * the entire command response once again, regardless of the state of it before this function - * was called. - * @param writable_event Event to use to wake up the thread. If unspecified, an event will be - * created. - * @returns Event that when signaled will resume the thread and call the callback function. - */ - std::shared_ptr<WritableEvent> SleepClientThread( - const std::string& reason, u64 timeout, WakeupCallback&& callback, - std::shared_ptr<WritableEvent> writable_event = nullptr); - /// Populates this context with data from the requesting process/thread. ResultCode PopulateFromIncomingCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf); diff --git a/src/core/hle/kernel/k_affinity_mask.h b/src/core/hle/kernel/k_affinity_mask.h new file mode 100644 index 000000000..dd73781cd --- /dev/null +++ b/src/core/hle/kernel/k_affinity_mask.h @@ -0,0 +1,58 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphere, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. + +#pragma once + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/hardware_properties.h" + +namespace Kernel { + +class KAffinityMask { +public: + constexpr KAffinityMask() = default; + + [[nodiscard]] constexpr u64 GetAffinityMask() const { + return this->mask; + } + + constexpr void SetAffinityMask(u64 new_mask) { + ASSERT((new_mask & ~AllowedAffinityMask) == 0); + this->mask = new_mask; + } + + [[nodiscard]] constexpr bool GetAffinity(s32 core) const { + return this->mask & GetCoreBit(core); + } + + constexpr void SetAffinity(s32 core, bool set) { + ASSERT(0 <= core && core < static_cast<s32>(Core::Hardware::NUM_CPU_CORES)); + + if (set) { + this->mask |= GetCoreBit(core); + } else { + this->mask &= ~GetCoreBit(core); + } + } + + constexpr void SetAll() { + this->mask = AllowedAffinityMask; + } + +private: + [[nodiscard]] static constexpr u64 GetCoreBit(s32 core) { + ASSERT(0 <= core && core < static_cast<s32>(Core::Hardware::NUM_CPU_CORES)); + return (1ULL << core); + } + + static constexpr u64 AllowedAffinityMask = (1ULL << Core::Hardware::NUM_CPU_CORES) - 1; + + u64 mask{}; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h new file mode 100644 index 000000000..99fb8fe93 --- /dev/null +++ b/src/core/hle/kernel/k_priority_queue.h @@ -0,0 +1,451 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphere, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. + +#pragma once + +#include <array> +#include <concepts> + +#include "common/assert.h" +#include "common/bit_set.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/concepts.h" + +namespace Kernel { + +class Thread; + +template <typename T> +concept KPriorityQueueAffinityMask = !std::is_reference_v<T> && requires(T & t) { + { t.GetAffinityMask() } + ->Common::ConvertibleTo<u64>; + {t.SetAffinityMask(std::declval<u64>())}; + + { t.GetAffinity(std::declval<int32_t>()) } + ->std::same_as<bool>; + {t.SetAffinity(std::declval<int32_t>(), std::declval<bool>())}; + {t.SetAll()}; +}; + +template <typename T> +concept KPriorityQueueMember = !std::is_reference_v<T> && requires(T & t) { + {typename T::QueueEntry()}; + {(typename T::QueueEntry()).Initialize()}; + {(typename T::QueueEntry()).SetPrev(std::addressof(t))}; + {(typename T::QueueEntry()).SetNext(std::addressof(t))}; + { (typename T::QueueEntry()).GetNext() } + ->std::same_as<T*>; + { (typename T::QueueEntry()).GetPrev() } + ->std::same_as<T*>; + { t.GetPriorityQueueEntry(std::declval<s32>()) } + ->std::same_as<typename T::QueueEntry&>; + + {t.GetAffinityMask()}; + { typename std::remove_cvref<decltype(t.GetAffinityMask())>::type() } + ->KPriorityQueueAffinityMask; + + { t.GetActiveCore() } + ->Common::ConvertibleTo<s32>; + { t.GetPriority() } + ->Common::ConvertibleTo<s32>; +}; + +template <typename Member, size_t _NumCores, int LowestPriority, int HighestPriority> +requires KPriorityQueueMember<Member> class KPriorityQueue { +public: + using AffinityMaskType = typename std::remove_cv_t< + typename std::remove_reference<decltype(std::declval<Member>().GetAffinityMask())>::type>; + + static_assert(LowestPriority >= 0); + static_assert(HighestPriority >= 0); + static_assert(LowestPriority >= HighestPriority); + static constexpr size_t NumPriority = LowestPriority - HighestPriority + 1; + static constexpr size_t NumCores = _NumCores; + + static constexpr bool IsValidCore(s32 core) { + return 0 <= core && core < static_cast<s32>(NumCores); + } + + static constexpr bool IsValidPriority(s32 priority) { + return HighestPriority <= priority && priority <= LowestPriority + 1; + } + +private: + using Entry = typename Member::QueueEntry; + +public: + class KPerCoreQueue { + private: + std::array<Entry, NumCores> root{}; + + public: + constexpr KPerCoreQueue() { + for (auto& per_core_root : root) { + per_core_root.Initialize(); + } + } + + constexpr bool PushBack(s32 core, Member* member) { + // Get the entry associated with the member. + Entry& member_entry = member->GetPriorityQueueEntry(core); + + // Get the entry associated with the end of the queue. + Member* tail = this->root[core].GetPrev(); + Entry& tail_entry = + (tail != nullptr) ? tail->GetPriorityQueueEntry(core) : this->root[core]; + + // Link the entries. + member_entry.SetPrev(tail); + member_entry.SetNext(nullptr); + tail_entry.SetNext(member); + this->root[core].SetPrev(member); + + return tail == nullptr; + } + + constexpr bool PushFront(s32 core, Member* member) { + // Get the entry associated with the member. + Entry& member_entry = member->GetPriorityQueueEntry(core); + + // Get the entry associated with the front of the queue. + Member* head = this->root[core].GetNext(); + Entry& head_entry = + (head != nullptr) ? head->GetPriorityQueueEntry(core) : this->root[core]; + + // Link the entries. + member_entry.SetPrev(nullptr); + member_entry.SetNext(head); + head_entry.SetPrev(member); + this->root[core].SetNext(member); + + return (head == nullptr); + } + + constexpr bool Remove(s32 core, Member* member) { + // Get the entry associated with the member. + Entry& member_entry = member->GetPriorityQueueEntry(core); + + // Get the entries associated with next and prev. + Member* prev = member_entry.GetPrev(); + Member* next = member_entry.GetNext(); + Entry& prev_entry = + (prev != nullptr) ? prev->GetPriorityQueueEntry(core) : this->root[core]; + Entry& next_entry = + (next != nullptr) ? next->GetPriorityQueueEntry(core) : this->root[core]; + + // Unlink. + prev_entry.SetNext(next); + next_entry.SetPrev(prev); + + return (this->GetFront(core) == nullptr); + } + + constexpr Member* GetFront(s32 core) const { + return this->root[core].GetNext(); + } + }; + + class KPriorityQueueImpl { + public: + constexpr KPriorityQueueImpl() = default; + + constexpr void PushBack(s32 priority, s32 core, Member* member) { + ASSERT(IsValidCore(core)); + ASSERT(IsValidPriority(priority)); + + if (priority > LowestPriority) { + return; + } + + if (this->queues[priority].PushBack(core, member)) { + this->available_priorities[core].SetBit(priority); + } + } + + constexpr void PushFront(s32 priority, s32 core, Member* member) { + ASSERT(IsValidCore(core)); + ASSERT(IsValidPriority(priority)); + + if (priority > LowestPriority) { + return; + } + + if (this->queues[priority].PushFront(core, member)) { + this->available_priorities[core].SetBit(priority); + } + } + + constexpr void Remove(s32 priority, s32 core, Member* member) { + ASSERT(IsValidCore(core)); + ASSERT(IsValidPriority(priority)); + + if (priority > LowestPriority) { + return; + } + + if (this->queues[priority].Remove(core, member)) { + this->available_priorities[core].ClearBit(priority); + } + } + + constexpr Member* GetFront(s32 core) const { + ASSERT(IsValidCore(core)); + + const s32 priority = + static_cast<s32>(this->available_priorities[core].CountLeadingZero()); + if (priority <= LowestPriority) { + return this->queues[priority].GetFront(core); + } else { + return nullptr; + } + } + + constexpr Member* GetFront(s32 priority, s32 core) const { + ASSERT(IsValidCore(core)); + ASSERT(IsValidPriority(priority)); + + if (priority <= LowestPriority) { + return this->queues[priority].GetFront(core); + } else { + return nullptr; + } + } + + constexpr Member* GetNext(s32 core, const Member* member) const { + ASSERT(IsValidCore(core)); + + Member* next = member->GetPriorityQueueEntry(core).GetNext(); + if (next == nullptr) { + const s32 priority = static_cast<s32>( + this->available_priorities[core].GetNextSet(member->GetPriority())); + if (priority <= LowestPriority) { + next = this->queues[priority].GetFront(core); + } + } + return next; + } + + constexpr void MoveToFront(s32 priority, s32 core, Member* member) { + ASSERT(IsValidCore(core)); + ASSERT(IsValidPriority(priority)); + + if (priority <= LowestPriority) { + this->queues[priority].Remove(core, member); + this->queues[priority].PushFront(core, member); + } + } + + constexpr Member* MoveToBack(s32 priority, s32 core, Member* member) { + ASSERT(IsValidCore(core)); + ASSERT(IsValidPriority(priority)); + + if (priority <= LowestPriority) { + this->queues[priority].Remove(core, member); + this->queues[priority].PushBack(core, member); + return this->queues[priority].GetFront(core); + } else { + return nullptr; + } + } + + private: + std::array<KPerCoreQueue, NumPriority> queues{}; + std::array<Common::BitSet64<NumPriority>, NumCores> available_priorities{}; + }; + +private: + KPriorityQueueImpl scheduled_queue; + KPriorityQueueImpl suggested_queue; + +private: + constexpr void ClearAffinityBit(u64& affinity, s32 core) { + affinity &= ~(u64(1) << core); + } + + constexpr s32 GetNextCore(u64& affinity) { + const s32 core = Common::CountTrailingZeroes64(affinity); + ClearAffinityBit(affinity, core); + return core; + } + + constexpr void PushBack(s32 priority, Member* member) { + ASSERT(IsValidPriority(priority)); + + // Push onto the scheduled queue for its core, if we can. + u64 affinity = member->GetAffinityMask().GetAffinityMask(); + if (const s32 core = member->GetActiveCore(); core >= 0) { + this->scheduled_queue.PushBack(priority, core, member); + ClearAffinityBit(affinity, core); + } + + // And suggest the thread for all other cores. + while (affinity) { + this->suggested_queue.PushBack(priority, GetNextCore(affinity), member); + } + } + + constexpr void PushFront(s32 priority, Member* member) { + ASSERT(IsValidPriority(priority)); + + // Push onto the scheduled queue for its core, if we can. + u64 affinity = member->GetAffinityMask().GetAffinityMask(); + if (const s32 core = member->GetActiveCore(); core >= 0) { + this->scheduled_queue.PushFront(priority, core, member); + ClearAffinityBit(affinity, core); + } + + // And suggest the thread for all other cores. + // Note: Nintendo pushes onto the back of the suggested queue, not the front. + while (affinity) { + this->suggested_queue.PushBack(priority, GetNextCore(affinity), member); + } + } + + constexpr void Remove(s32 priority, Member* member) { + ASSERT(IsValidPriority(priority)); + + // Remove from the scheduled queue for its core. + u64 affinity = member->GetAffinityMask().GetAffinityMask(); + if (const s32 core = member->GetActiveCore(); core >= 0) { + this->scheduled_queue.Remove(priority, core, member); + ClearAffinityBit(affinity, core); + } + + // Remove from the suggested queue for all other cores. + while (affinity) { + this->suggested_queue.Remove(priority, GetNextCore(affinity), member); + } + } + +public: + constexpr KPriorityQueue() = default; + + // Getters. + constexpr Member* GetScheduledFront(s32 core) const { + return this->scheduled_queue.GetFront(core); + } + + constexpr Member* GetScheduledFront(s32 core, s32 priority) const { + return this->scheduled_queue.GetFront(priority, core); + } + + constexpr Member* GetSuggestedFront(s32 core) const { + return this->suggested_queue.GetFront(core); + } + + constexpr Member* GetSuggestedFront(s32 core, s32 priority) const { + return this->suggested_queue.GetFront(priority, core); + } + + constexpr Member* GetScheduledNext(s32 core, const Member* member) const { + return this->scheduled_queue.GetNext(core, member); + } + + constexpr Member* GetSuggestedNext(s32 core, const Member* member) const { + return this->suggested_queue.GetNext(core, member); + } + + constexpr Member* GetSamePriorityNext(s32 core, const Member* member) const { + return member->GetPriorityQueueEntry(core).GetNext(); + } + + // Mutators. + constexpr void PushBack(Member* member) { + this->PushBack(member->GetPriority(), member); + } + + constexpr void Remove(Member* member) { + this->Remove(member->GetPriority(), member); + } + + constexpr void MoveToScheduledFront(Member* member) { + this->scheduled_queue.MoveToFront(member->GetPriority(), member->GetActiveCore(), member); + } + + constexpr Thread* MoveToScheduledBack(Member* member) { + return this->scheduled_queue.MoveToBack(member->GetPriority(), member->GetActiveCore(), + member); + } + + // First class fancy operations. + constexpr void ChangePriority(s32 prev_priority, bool is_running, Member* member) { + ASSERT(IsValidPriority(prev_priority)); + + // Remove the member from the queues. + const s32 new_priority = member->GetPriority(); + this->Remove(prev_priority, member); + + // And enqueue. If the member is running, we want to keep it running. + if (is_running) { + this->PushFront(new_priority, member); + } else { + this->PushBack(new_priority, member); + } + } + + constexpr void ChangeAffinityMask(s32 prev_core, const AffinityMaskType& prev_affinity, + Member* member) { + // Get the new information. + const s32 priority = member->GetPriority(); + const AffinityMaskType& new_affinity = member->GetAffinityMask(); + const s32 new_core = member->GetActiveCore(); + + // Remove the member from all queues it was in before. + for (s32 core = 0; core < static_cast<s32>(NumCores); core++) { + if (prev_affinity.GetAffinity(core)) { + if (core == prev_core) { + this->scheduled_queue.Remove(priority, core, member); + } else { + this->suggested_queue.Remove(priority, core, member); + } + } + } + + // And add the member to all queues it should be in now. + for (s32 core = 0; core < static_cast<s32>(NumCores); core++) { + if (new_affinity.GetAffinity(core)) { + if (core == new_core) { + this->scheduled_queue.PushBack(priority, core, member); + } else { + this->suggested_queue.PushBack(priority, core, member); + } + } + } + } + + constexpr void ChangeCore(s32 prev_core, Member* member, bool to_front = false) { + // Get the new information. + const s32 new_core = member->GetActiveCore(); + const s32 priority = member->GetPriority(); + + // We don't need to do anything if the core is the same. + if (prev_core != new_core) { + // Remove from the scheduled queue for the previous core. + if (prev_core >= 0) { + this->scheduled_queue.Remove(priority, prev_core, member); + } + + // Remove from the suggested queue and add to the scheduled queue for the new core. + if (new_core >= 0) { + this->suggested_queue.Remove(priority, new_core, member); + if (to_front) { + this->scheduled_queue.PushFront(priority, new_core, member); + } else { + this->scheduled_queue.PushBack(priority, new_core, member); + } + } + + // Add to the suggested queue for the previous core. + if (prev_core >= 0) { + this->suggested_queue.PushBack(priority, prev_core, member); + } + } + } +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp new file mode 100644 index 000000000..c5fd82a6b --- /dev/null +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -0,0 +1,784 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphere, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. + +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/fiber.h" +#include "common/logging/log.h" +#include "core/arm/arm_interface.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/cpu_manager.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/process.h" +#include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" + +namespace Kernel { + +static void IncrementScheduledCount(Kernel::Thread* thread) { + if (auto process = thread->GetOwnerProcess(); process) { + process->IncrementScheduledCount(); + } +} + +void KScheduler::RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedule, + Core::EmuThreadHandle global_thread) { + u32 current_core = global_thread.host_handle; + bool must_context_switch = global_thread.guest_handle != InvalidHandle && + (current_core < Core::Hardware::NUM_CPU_CORES); + + while (cores_pending_reschedule != 0) { + u32 core = Common::CountTrailingZeroes64(cores_pending_reschedule); + ASSERT(core < Core::Hardware::NUM_CPU_CORES); + if (!must_context_switch || core != current_core) { + auto& phys_core = kernel.PhysicalCore(core); + phys_core.Interrupt(); + } else { + must_context_switch = true; + } + cores_pending_reschedule &= ~(1ULL << core); + } + if (must_context_switch) { + auto core_scheduler = kernel.CurrentScheduler(); + kernel.ExitSVCProfile(); + core_scheduler->RescheduleCurrentCore(); + kernel.EnterSVCProfile(); + } +} + +u64 KScheduler::UpdateHighestPriorityThread(Thread* highest_thread) { + std::scoped_lock lock{guard}; + if (Thread* prev_highest_thread = this->state.highest_priority_thread; + prev_highest_thread != highest_thread) { + if (prev_highest_thread != nullptr) { + IncrementScheduledCount(prev_highest_thread); + prev_highest_thread->SetLastScheduledTick(system.CoreTiming().GetCPUTicks()); + } + if (this->state.should_count_idle) { + if (highest_thread != nullptr) { + // if (Process* process = highest_thread->GetOwnerProcess(); process != nullptr) { + // process->SetRunningThread(this->core_id, highest_thread, + // this->state.idle_count); + //} + } else { + this->state.idle_count++; + } + } + + this->state.highest_priority_thread = highest_thread; + this->state.needs_scheduling = true; + return (1ULL << this->core_id); + } else { + return 0; + } +} + +u64 KScheduler::UpdateHighestPriorityThreadsImpl(KernelCore& kernel) { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + // Clear that we need to update. + ClearSchedulerUpdateNeeded(kernel); + + u64 cores_needing_scheduling = 0, idle_cores = 0; + Thread* top_threads[Core::Hardware::NUM_CPU_CORES]; + auto& priority_queue = GetPriorityQueue(kernel); + + /// We want to go over all cores, finding the highest priority thread and determining if + /// scheduling is needed for that core. + for (size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { + Thread* top_thread = priority_queue.GetScheduledFront(static_cast<s32>(core_id)); + if (top_thread != nullptr) { + // If the thread has no waiters, we need to check if the process has a thread pinned. + // TODO(bunnei): Implement thread pinning + } else { + idle_cores |= (1ULL << core_id); + } + + top_threads[core_id] = top_thread; + cores_needing_scheduling |= + kernel.Scheduler(core_id).UpdateHighestPriorityThread(top_threads[core_id]); + } + + // Idle cores are bad. We're going to try to migrate threads to each idle core in turn. + while (idle_cores != 0) { + u32 core_id = Common::CountTrailingZeroes64(idle_cores); + if (Thread* suggested = priority_queue.GetSuggestedFront(core_id); suggested != nullptr) { + s32 migration_candidates[Core::Hardware::NUM_CPU_CORES]; + size_t num_candidates = 0; + + // While we have a suggested thread, try to migrate it! + while (suggested != nullptr) { + // Check if the suggested thread is the top thread on its core. + const s32 suggested_core = suggested->GetActiveCore(); + if (Thread* top_thread = + (suggested_core >= 0) ? top_threads[suggested_core] : nullptr; + top_thread != suggested) { + // Make sure we're not dealing with threads too high priority for migration. + if (top_thread != nullptr && + top_thread->GetPriority() < HighestCoreMigrationAllowedPriority) { + break; + } + + // The suggested thread isn't bound to its core, so we can migrate it! + suggested->SetActiveCore(core_id); + priority_queue.ChangeCore(suggested_core, suggested); + + top_threads[core_id] = suggested; + cores_needing_scheduling |= + kernel.Scheduler(core_id).UpdateHighestPriorityThread(top_threads[core_id]); + break; + } + + // Note this core as a candidate for migration. + ASSERT(num_candidates < Core::Hardware::NUM_CPU_CORES); + migration_candidates[num_candidates++] = suggested_core; + suggested = priority_queue.GetSuggestedNext(core_id, suggested); + } + + // If suggested is nullptr, we failed to migrate a specific thread. So let's try all our + // candidate cores' top threads. + if (suggested == nullptr) { + for (size_t i = 0; i < num_candidates; i++) { + // Check if there's some other thread that can run on the candidate core. + const s32 candidate_core = migration_candidates[i]; + suggested = top_threads[candidate_core]; + if (Thread* next_on_candidate_core = + priority_queue.GetScheduledNext(candidate_core, suggested); + next_on_candidate_core != nullptr) { + // The candidate core can run some other thread! We'll migrate its current + // top thread to us. + top_threads[candidate_core] = next_on_candidate_core; + cores_needing_scheduling |= + kernel.Scheduler(candidate_core) + .UpdateHighestPriorityThread(top_threads[candidate_core]); + + // Perform the migration. + suggested->SetActiveCore(core_id); + priority_queue.ChangeCore(candidate_core, suggested); + + top_threads[core_id] = suggested; + cores_needing_scheduling |= + kernel.Scheduler(core_id).UpdateHighestPriorityThread( + top_threads[core_id]); + break; + } + } + } + } + + idle_cores &= ~(1ULL << core_id); + } + + return cores_needing_scheduling; +} + +void KScheduler::OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state) { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + // Check if the state has changed, because if it hasn't there's nothing to do. + const auto cur_state = thread->scheduling_state; + if (cur_state == old_state) { + return; + } + + // Update the priority queues. + if (old_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + // If we were previously runnable, then we're not runnable now, and we should remove. + GetPriorityQueue(kernel).Remove(thread); + IncrementScheduledCount(thread); + SetSchedulerUpdateNeeded(kernel); + } else if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + // If we're now runnable, then we weren't previously, and we should add. + GetPriorityQueue(kernel).PushBack(thread); + IncrementScheduledCount(thread); + SetSchedulerUpdateNeeded(kernel); + } +} + +void KScheduler::OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread, + u32 old_priority) { + + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + // If the thread is runnable, we want to change its priority in the queue. + if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + GetPriorityQueue(kernel).ChangePriority( + old_priority, thread == kernel.CurrentScheduler()->GetCurrentThread(), thread); + IncrementScheduledCount(thread); + SetSchedulerUpdateNeeded(kernel); + } +} + +void KScheduler::OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread, + const KAffinityMask& old_affinity, s32 old_core) { + ASSERT(kernel.GlobalSchedulerContext().IsLocked()); + + // If the thread is runnable, we want to change its affinity in the queue. + if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + GetPriorityQueue(kernel).ChangeAffinityMask(old_core, old_affinity, thread); + IncrementScheduledCount(thread); + SetSchedulerUpdateNeeded(kernel); + } +} + +void KScheduler::RotateScheduledQueue(s32 core_id, s32 priority) { + ASSERT(system.GlobalSchedulerContext().IsLocked()); + + // Get a reference to the priority queue. + auto& kernel = system.Kernel(); + auto& priority_queue = GetPriorityQueue(kernel); + + // Rotate the front of the queue to the end. + Thread* top_thread = priority_queue.GetScheduledFront(core_id, priority); + Thread* next_thread = nullptr; + if (top_thread != nullptr) { + next_thread = priority_queue.MoveToScheduledBack(top_thread); + if (next_thread != top_thread) { + IncrementScheduledCount(top_thread); + IncrementScheduledCount(next_thread); + } + } + + // While we have a suggested thread, try to migrate it! + { + Thread* suggested = priority_queue.GetSuggestedFront(core_id, priority); + while (suggested != nullptr) { + // Check if the suggested thread is the top thread on its core. + const s32 suggested_core = suggested->GetActiveCore(); + if (Thread* top_on_suggested_core = + (suggested_core >= 0) ? priority_queue.GetScheduledFront(suggested_core) + : nullptr; + top_on_suggested_core != suggested) { + // If the next thread is a new thread that has been waiting longer than our + // suggestion, we prefer it to our suggestion. + if (top_thread != next_thread && next_thread != nullptr && + next_thread->GetLastScheduledTick() < suggested->GetLastScheduledTick()) { + suggested = nullptr; + break; + } + + // If we're allowed to do a migration, do one. + // NOTE: Unlike migrations in UpdateHighestPriorityThread, this moves the suggestion + // to the front of the queue. + if (top_on_suggested_core == nullptr || + top_on_suggested_core->GetPriority() >= HighestCoreMigrationAllowedPriority) { + suggested->SetActiveCore(core_id); + priority_queue.ChangeCore(suggested_core, suggested, true); + IncrementScheduledCount(suggested); + break; + } + } + + // Get the next suggestion. + suggested = priority_queue.GetSamePriorityNext(core_id, suggested); + } + } + + // Now that we might have migrated a thread with the same priority, check if we can do better. + + { + Thread* best_thread = priority_queue.GetScheduledFront(core_id); + if (best_thread == GetCurrentThread()) { + best_thread = priority_queue.GetScheduledNext(core_id, best_thread); + } + + // If the best thread we can choose has a priority the same or worse than ours, try to + // migrate a higher priority thread. + if (best_thread != nullptr && best_thread->GetPriority() >= static_cast<u32>(priority)) { + Thread* suggested = priority_queue.GetSuggestedFront(core_id); + while (suggested != nullptr) { + // If the suggestion's priority is the same as ours, don't bother. + if (suggested->GetPriority() >= best_thread->GetPriority()) { + break; + } + + // Check if the suggested thread is the top thread on its core. + const s32 suggested_core = suggested->GetActiveCore(); + if (Thread* top_on_suggested_core = + (suggested_core >= 0) ? priority_queue.GetScheduledFront(suggested_core) + : nullptr; + top_on_suggested_core != suggested) { + // If we're allowed to do a migration, do one. + // NOTE: Unlike migrations in UpdateHighestPriorityThread, this moves the + // suggestion to the front of the queue. + if (top_on_suggested_core == nullptr || + top_on_suggested_core->GetPriority() >= + HighestCoreMigrationAllowedPriority) { + suggested->SetActiveCore(core_id); + priority_queue.ChangeCore(suggested_core, suggested, true); + IncrementScheduledCount(suggested); + break; + } + } + + // Get the next suggestion. + suggested = priority_queue.GetSuggestedNext(core_id, suggested); + } + } + } + + // After a rotation, we need a scheduler update. + SetSchedulerUpdateNeeded(kernel); +} + +bool KScheduler::CanSchedule(KernelCore& kernel) { + return kernel.CurrentScheduler()->GetCurrentThread()->GetDisableDispatchCount() <= 1; +} + +bool KScheduler::IsSchedulerUpdateNeeded(const KernelCore& kernel) { + return kernel.GlobalSchedulerContext().scheduler_update_needed.load(std::memory_order_acquire); +} + +void KScheduler::SetSchedulerUpdateNeeded(KernelCore& kernel) { + kernel.GlobalSchedulerContext().scheduler_update_needed.store(true, std::memory_order_release); +} + +void KScheduler::ClearSchedulerUpdateNeeded(KernelCore& kernel) { + kernel.GlobalSchedulerContext().scheduler_update_needed.store(false, std::memory_order_release); +} + +void KScheduler::DisableScheduling(KernelCore& kernel) { + if (auto* scheduler = kernel.CurrentScheduler(); scheduler) { + ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 0); + scheduler->GetCurrentThread()->DisableDispatch(); + } +} + +void KScheduler::EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling, + Core::EmuThreadHandle global_thread) { + if (auto* scheduler = kernel.CurrentScheduler(); scheduler) { + scheduler->GetCurrentThread()->EnableDispatch(); + } + RescheduleCores(kernel, cores_needing_scheduling, global_thread); +} + +u64 KScheduler::UpdateHighestPriorityThreads(KernelCore& kernel) { + if (IsSchedulerUpdateNeeded(kernel)) { + return UpdateHighestPriorityThreadsImpl(kernel); + } else { + return 0; + } +} + +KSchedulerPriorityQueue& KScheduler::GetPriorityQueue(KernelCore& kernel) { + return kernel.GlobalSchedulerContext().priority_queue; +} + +void KScheduler::YieldWithoutCoreMigration() { + auto& kernel = system.Kernel(); + + // Validate preconditions. + ASSERT(CanSchedule(kernel)); + ASSERT(kernel.CurrentProcess() != nullptr); + + // Get the current thread and process. + Thread& cur_thread = *GetCurrentThread(); + Process& cur_process = *kernel.CurrentProcess(); + + // If the thread's yield count matches, there's nothing for us to do. + if (cur_thread.GetYieldScheduleCount() == cur_process.GetScheduledCount()) { + return; + } + + // Get a reference to the priority queue. + auto& priority_queue = GetPriorityQueue(kernel); + + // Perform the yield. + { + KScopedSchedulerLock lock(kernel); + + const auto cur_state = cur_thread.scheduling_state; + if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + // Put the current thread at the back of the queue. + Thread* next_thread = priority_queue.MoveToScheduledBack(std::addressof(cur_thread)); + IncrementScheduledCount(std::addressof(cur_thread)); + + // If the next thread is different, we have an update to perform. + if (next_thread != std::addressof(cur_thread)) { + SetSchedulerUpdateNeeded(kernel); + } else { + // Otherwise, set the thread's yield count so that we won't waste work until the + // process is scheduled again. + cur_thread.SetYieldScheduleCount(cur_process.GetScheduledCount()); + } + } + } +} + +void KScheduler::YieldWithCoreMigration() { + auto& kernel = system.Kernel(); + + // Validate preconditions. + ASSERT(CanSchedule(kernel)); + ASSERT(kernel.CurrentProcess() != nullptr); + + // Get the current thread and process. + Thread& cur_thread = *GetCurrentThread(); + Process& cur_process = *kernel.CurrentProcess(); + + // If the thread's yield count matches, there's nothing for us to do. + if (cur_thread.GetYieldScheduleCount() == cur_process.GetScheduledCount()) { + return; + } + + // Get a reference to the priority queue. + auto& priority_queue = GetPriorityQueue(kernel); + + // Perform the yield. + { + KScopedSchedulerLock lock(kernel); + + const auto cur_state = cur_thread.scheduling_state; + if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + // Get the current active core. + const s32 core_id = cur_thread.GetActiveCore(); + + // Put the current thread at the back of the queue. + Thread* next_thread = priority_queue.MoveToScheduledBack(std::addressof(cur_thread)); + IncrementScheduledCount(std::addressof(cur_thread)); + + // While we have a suggested thread, try to migrate it! + bool recheck = false; + Thread* suggested = priority_queue.GetSuggestedFront(core_id); + while (suggested != nullptr) { + // Check if the suggested thread is the thread running on its core. + const s32 suggested_core = suggested->GetActiveCore(); + + if (Thread* running_on_suggested_core = + (suggested_core >= 0) + ? kernel.Scheduler(suggested_core).state.highest_priority_thread + : nullptr; + running_on_suggested_core != suggested) { + // If the current thread's priority is higher than our suggestion's we prefer + // the next thread to the suggestion. We also prefer the next thread when the + // current thread's priority is equal to the suggestions, but the next thread + // has been waiting longer. + if ((suggested->GetPriority() > cur_thread.GetPriority()) || + (suggested->GetPriority() == cur_thread.GetPriority() && + next_thread != std::addressof(cur_thread) && + next_thread->GetLastScheduledTick() < suggested->GetLastScheduledTick())) { + suggested = nullptr; + break; + } + + // If we're allowed to do a migration, do one. + // NOTE: Unlike migrations in UpdateHighestPriorityThread, this moves the + // suggestion to the front of the queue. + if (running_on_suggested_core == nullptr || + running_on_suggested_core->GetPriority() >= + HighestCoreMigrationAllowedPriority) { + suggested->SetActiveCore(core_id); + priority_queue.ChangeCore(suggested_core, suggested, true); + IncrementScheduledCount(suggested); + break; + } else { + // We couldn't perform a migration, but we should check again on a future + // yield. + recheck = true; + } + } + + // Get the next suggestion. + suggested = priority_queue.GetSuggestedNext(core_id, suggested); + } + + // If we still have a suggestion or the next thread is different, we have an update to + // perform. + if (suggested != nullptr || next_thread != std::addressof(cur_thread)) { + SetSchedulerUpdateNeeded(kernel); + } else if (!recheck) { + // Otherwise if we don't need to re-check, set the thread's yield count so that we + // won't waste work until the process is scheduled again. + cur_thread.SetYieldScheduleCount(cur_process.GetScheduledCount()); + } + } + } +} + +void KScheduler::YieldToAnyThread() { + auto& kernel = system.Kernel(); + + // Validate preconditions. + ASSERT(CanSchedule(kernel)); + ASSERT(kernel.CurrentProcess() != nullptr); + + // Get the current thread and process. + Thread& cur_thread = *GetCurrentThread(); + Process& cur_process = *kernel.CurrentProcess(); + + // If the thread's yield count matches, there's nothing for us to do. + if (cur_thread.GetYieldScheduleCount() == cur_process.GetScheduledCount()) { + return; + } + + // Get a reference to the priority queue. + auto& priority_queue = GetPriorityQueue(kernel); + + // Perform the yield. + { + KScopedSchedulerLock lock(kernel); + + const auto cur_state = cur_thread.scheduling_state; + if (cur_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { + // Get the current active core. + const s32 core_id = cur_thread.GetActiveCore(); + + // Migrate the current thread to core -1. + cur_thread.SetActiveCore(-1); + priority_queue.ChangeCore(core_id, std::addressof(cur_thread)); + IncrementScheduledCount(std::addressof(cur_thread)); + + // If there's nothing scheduled, we can try to perform a migration. + if (priority_queue.GetScheduledFront(core_id) == nullptr) { + // While we have a suggested thread, try to migrate it! + Thread* suggested = priority_queue.GetSuggestedFront(core_id); + while (suggested != nullptr) { + // Check if the suggested thread is the top thread on its core. + const s32 suggested_core = suggested->GetActiveCore(); + if (Thread* top_on_suggested_core = + (suggested_core >= 0) ? priority_queue.GetScheduledFront(suggested_core) + : nullptr; + top_on_suggested_core != suggested) { + // If we're allowed to do a migration, do one. + if (top_on_suggested_core == nullptr || + top_on_suggested_core->GetPriority() >= + HighestCoreMigrationAllowedPriority) { + suggested->SetActiveCore(core_id); + priority_queue.ChangeCore(suggested_core, suggested); + IncrementScheduledCount(suggested); + } + + // Regardless of whether we migrated, we had a candidate, so we're done. + break; + } + + // Get the next suggestion. + suggested = priority_queue.GetSuggestedNext(core_id, suggested); + } + + // If the suggestion is different from the current thread, we need to perform an + // update. + if (suggested != std::addressof(cur_thread)) { + SetSchedulerUpdateNeeded(kernel); + } else { + // Otherwise, set the thread's yield count so that we won't waste work until the + // process is scheduled again. + cur_thread.SetYieldScheduleCount(cur_process.GetScheduledCount()); + } + } else { + // Otherwise, we have an update to perform. + SetSchedulerUpdateNeeded(kernel); + } + } + } +} + +KScheduler::KScheduler(Core::System& system, std::size_t core_id) + : system(system), core_id(core_id) { + switch_fiber = std::make_shared<Common::Fiber>(OnSwitch, this); + this->state.needs_scheduling = true; + this->state.interrupt_task_thread_runnable = false; + this->state.should_count_idle = false; + this->state.idle_count = 0; + this->state.idle_thread_stack = nullptr; + this->state.highest_priority_thread = nullptr; +} + +KScheduler::~KScheduler() = default; + +Thread* KScheduler::GetCurrentThread() const { + if (current_thread) { + return current_thread; + } + return idle_thread; +} + +u64 KScheduler::GetLastContextSwitchTicks() const { + return last_context_switch_time; +} + +void KScheduler::RescheduleCurrentCore() { + ASSERT(GetCurrentThread()->GetDisableDispatchCount() == 1); + + auto& phys_core = system.Kernel().PhysicalCore(core_id); + if (phys_core.IsInterrupted()) { + phys_core.ClearInterrupt(); + } + guard.lock(); + if (this->state.needs_scheduling) { + Schedule(); + } else { + guard.unlock(); + } +} + +void KScheduler::OnThreadStart() { + SwitchContextStep2(); +} + +void KScheduler::Unload(Thread* thread) { + if (thread) { + thread->SetIsRunning(false); + if (thread->IsContinuousOnSVC() && !thread->IsHLEThread()) { + system.ArmInterface(core_id).ExceptionalExit(); + thread->SetContinuousOnSVC(false); + } + if (!thread->IsHLEThread() && !thread->HasExited()) { + Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); + cpu_core.SaveContext(thread->GetContext32()); + cpu_core.SaveContext(thread->GetContext64()); + // Save the TPIDR_EL0 system register in case it was modified. + thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + cpu_core.ClearExclusiveState(); + } + thread->context_guard.unlock(); + } +} + +void KScheduler::Reload(Thread* thread) { + if (thread) { + ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable, + "Thread must be runnable."); + + // Cancel any outstanding wakeup events for this thread + thread->SetIsRunning(true); + thread->SetWasRunning(false); + + auto* const thread_owner_process = thread->GetOwnerProcess(); + if (thread_owner_process != nullptr) { + system.Kernel().MakeCurrentProcess(thread_owner_process); + } + if (!thread->IsHLEThread()) { + Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); + cpu_core.LoadContext(thread->GetContext32()); + cpu_core.LoadContext(thread->GetContext64()); + cpu_core.SetTlsAddress(thread->GetTLSAddress()); + cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); + cpu_core.ClearExclusiveState(); + } + } +} + +void KScheduler::SwitchContextStep2() { + // Load context of new thread + Reload(current_thread); + + RescheduleCurrentCore(); +} + +void KScheduler::ScheduleImpl() { + Thread* previous_thread = current_thread; + current_thread = state.highest_priority_thread; + + this->state.needs_scheduling = false; + + if (current_thread == previous_thread) { + guard.unlock(); + return; + } + + Process* const previous_process = system.Kernel().CurrentProcess(); + + UpdateLastContextSwitchTime(previous_thread, previous_process); + + // Save context for previous thread + Unload(previous_thread); + + std::shared_ptr<Common::Fiber>* old_context; + if (previous_thread != nullptr) { + old_context = &previous_thread->GetHostContext(); + } else { + old_context = &idle_thread->GetHostContext(); + } + guard.unlock(); + + Common::Fiber::YieldTo(*old_context, switch_fiber); + /// When a thread wakes up, the scheduler may have changed to other in another core. + auto& next_scheduler = *system.Kernel().CurrentScheduler(); + next_scheduler.SwitchContextStep2(); +} + +void KScheduler::OnSwitch(void* this_scheduler) { + KScheduler* sched = static_cast<KScheduler*>(this_scheduler); + sched->SwitchToCurrent(); +} + +void KScheduler::SwitchToCurrent() { + while (true) { + { + std::scoped_lock lock{guard}; + current_thread = state.highest_priority_thread; + this->state.needs_scheduling = false; + } + const auto is_switch_pending = [this] { + std::scoped_lock lock{guard}; + return state.needs_scheduling.load(std::memory_order_relaxed); + }; + do { + if (current_thread != nullptr && !current_thread->IsHLEThread()) { + current_thread->context_guard.lock(); + if (!current_thread->IsRunnable()) { + current_thread->context_guard.unlock(); + break; + } + if (static_cast<u32>(current_thread->GetProcessorID()) != core_id) { + current_thread->context_guard.unlock(); + break; + } + } + std::shared_ptr<Common::Fiber>* next_context; + if (current_thread != nullptr) { + next_context = ¤t_thread->GetHostContext(); + } else { + next_context = &idle_thread->GetHostContext(); + } + Common::Fiber::YieldTo(switch_fiber, *next_context); + } while (!is_switch_pending()); + } +} + +void KScheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { + const u64 prev_switch_ticks = last_context_switch_time; + const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks(); + const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; + + if (thread != nullptr) { + thread->UpdateCPUTimeTicks(update_ticks); + } + + if (process != nullptr) { + process->UpdateCPUTimeTicks(update_ticks); + } + + last_context_switch_time = most_recent_switch_ticks; +} + +void KScheduler::Initialize() { + std::string name = "Idle Thread Id:" + std::to_string(core_id); + std::function<void(void*)> init_func = Core::CpuManager::GetIdleThreadStartFunc(); + void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); + ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE); + auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0, + nullptr, std::move(init_func), init_func_parameter); + idle_thread = thread_res.Unwrap().get(); + + { + KScopedSchedulerLock lock{system.Kernel()}; + idle_thread->SetStatus(ThreadStatus::Ready); + } +} + +KScopedSchedulerLock::KScopedSchedulerLock(KernelCore& kernel) + : KScopedLock(kernel.GlobalSchedulerContext().SchedulerLock()) {} + +KScopedSchedulerLock::~KScopedSchedulerLock() = default; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_scheduler.h b/src/core/hle/kernel/k_scheduler.h new file mode 100644 index 000000000..e84abc84c --- /dev/null +++ b/src/core/hle/kernel/k_scheduler.h @@ -0,0 +1,201 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphere, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. + +#pragma once + +#include <atomic> + +#include "common/common_types.h" +#include "common/spin_lock.h" +#include "core/hle/kernel/global_scheduler_context.h" +#include "core/hle/kernel/k_priority_queue.h" +#include "core/hle/kernel/k_scheduler_lock.h" +#include "core/hle/kernel/k_scoped_lock.h" + +namespace Common { +class Fiber; +} + +namespace Core { +class System; +} + +namespace Kernel { + +class KernelCore; +class Process; +class SchedulerLock; +class Thread; + +class KScheduler final { +public: + explicit KScheduler(Core::System& system, std::size_t core_id); + ~KScheduler(); + + /// Reschedules to the next available thread (call after current thread is suspended) + void RescheduleCurrentCore(); + + /// Reschedules cores pending reschedule, to be called on EnableScheduling. + static void RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedule, + Core::EmuThreadHandle global_thread); + + /// The next two are for SingleCore Only. + /// Unload current thread before preempting core. + void Unload(Thread* thread); + + /// Reload current thread after core preemption. + void Reload(Thread* thread); + + /// Gets the current running thread + [[nodiscard]] Thread* GetCurrentThread() const; + + /// Gets the timestamp for the last context switch in ticks. + [[nodiscard]] u64 GetLastContextSwitchTicks() const; + + [[nodiscard]] bool ContextSwitchPending() const { + return state.needs_scheduling.load(std::memory_order_relaxed); + } + + void Initialize(); + + void OnThreadStart(); + + [[nodiscard]] std::shared_ptr<Common::Fiber>& ControlContext() { + return switch_fiber; + } + + [[nodiscard]] const std::shared_ptr<Common::Fiber>& ControlContext() const { + return switch_fiber; + } + + [[nodiscard]] u64 UpdateHighestPriorityThread(Thread* highest_thread); + + /** + * Takes a thread and moves it to the back of the it's priority list. + * + * @note This operation can be redundant and no scheduling is changed if marked as so. + */ + void YieldWithoutCoreMigration(); + + /** + * Takes a thread and moves it to the back of the it's priority list. + * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or + * a better priority than the next thread in the core. + * + * @note This operation can be redundant and no scheduling is changed if marked as so. + */ + void YieldWithCoreMigration(); + + /** + * Takes a thread and moves it out of the scheduling queue. + * and into the suggested queue. If no thread can be scheduled afterwards in that core, + * a suggested thread is obtained instead. + * + * @note This operation can be redundant and no scheduling is changed if marked as so. + */ + void YieldToAnyThread(); + + /// Notify the scheduler a thread's status has changed. + static void OnThreadStateChanged(KernelCore& kernel, Thread* thread, u32 old_state); + + /// Notify the scheduler a thread's priority has changed. + static void OnThreadPriorityChanged(KernelCore& kernel, Thread* thread, Thread* current_thread, + u32 old_priority); + + /// Notify the scheduler a thread's core and/or affinity mask has changed. + static void OnThreadAffinityMaskChanged(KernelCore& kernel, Thread* thread, + const KAffinityMask& old_affinity, s32 old_core); + + static bool CanSchedule(KernelCore& kernel); + static bool IsSchedulerUpdateNeeded(const KernelCore& kernel); + static void SetSchedulerUpdateNeeded(KernelCore& kernel); + static void ClearSchedulerUpdateNeeded(KernelCore& kernel); + static void DisableScheduling(KernelCore& kernel); + static void EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling, + Core::EmuThreadHandle global_thread); + [[nodiscard]] static u64 UpdateHighestPriorityThreads(KernelCore& kernel); + +private: + friend class GlobalSchedulerContext; + + /** + * Takes care of selecting the new scheduled threads in three steps: + * + * 1. First a thread is selected from the top of the priority queue. If no thread + * is obtained then we move to step two, else we are done. + * + * 2. Second we try to get a suggested thread that's not assigned to any core or + * that is not the top thread in that core. + * + * 3. Third is no suggested thread is found, we do a second pass and pick a running + * thread in another core and swap it with its current thread. + * + * returns the cores needing scheduling. + */ + [[nodiscard]] static u64 UpdateHighestPriorityThreadsImpl(KernelCore& kernel); + + [[nodiscard]] static KSchedulerPriorityQueue& GetPriorityQueue(KernelCore& kernel); + + void RotateScheduledQueue(s32 core_id, s32 priority); + + void Schedule() { + ASSERT(GetCurrentThread()->GetDisableDispatchCount() == 1); + this->ScheduleImpl(); + } + + /// Switches the CPU's active thread context to that of the specified thread + void ScheduleImpl(); + + /// When a thread wakes up, it must run this through it's new scheduler + void SwitchContextStep2(); + + /** + * Called on every context switch to update the internal timestamp + * This also updates the running time ticks for the given thread and + * process using the following difference: + * + * ticks += most_recent_ticks - last_context_switch_ticks + * + * The internal tick timestamp for the scheduler is simply the + * most recent tick count retrieved. No special arithmetic is + * applied to it. + */ + void UpdateLastContextSwitchTime(Thread* thread, Process* process); + + static void OnSwitch(void* this_scheduler); + void SwitchToCurrent(); + + Thread* current_thread{}; + Thread* idle_thread{}; + + std::shared_ptr<Common::Fiber> switch_fiber{}; + + struct SchedulingState { + std::atomic<bool> needs_scheduling; + bool interrupt_task_thread_runnable{}; + bool should_count_idle{}; + u64 idle_count{}; + Thread* highest_priority_thread{}; + void* idle_thread_stack{}; + }; + + SchedulingState state; + + Core::System& system; + u64 last_context_switch_time{}; + const std::size_t core_id; + + Common::SpinLock guard{}; +}; + +class KScopedSchedulerLock : KScopedLock<GlobalSchedulerContext::LockType> { +public: + explicit KScopedSchedulerLock(KernelCore& kernel); + ~KScopedSchedulerLock(); +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_scheduler_lock.h b/src/core/hle/kernel/k_scheduler_lock.h new file mode 100644 index 000000000..2f1c1f691 --- /dev/null +++ b/src/core/hle/kernel/k_scheduler_lock.h @@ -0,0 +1,75 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphere, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. + +#pragma once + +#include "common/assert.h" +#include "common/spin_lock.h" +#include "core/hardware_properties.h" +#include "core/hle/kernel/kernel.h" + +namespace Kernel { + +class KernelCore; + +template <typename SchedulerType> +class KAbstractSchedulerLock { +public: + explicit KAbstractSchedulerLock(KernelCore& kernel) : kernel{kernel} {} + + bool IsLockedByCurrentThread() const { + return this->owner_thread == kernel.GetCurrentEmuThreadID(); + } + + void Lock() { + if (this->IsLockedByCurrentThread()) { + // If we already own the lock, we can just increment the count. + ASSERT(this->lock_count > 0); + this->lock_count++; + } else { + // Otherwise, we want to disable scheduling and acquire the spinlock. + SchedulerType::DisableScheduling(kernel); + this->spin_lock.lock(); + + // For debug, ensure that our state is valid. + ASSERT(this->lock_count == 0); + ASSERT(this->owner_thread == Core::EmuThreadHandle::InvalidHandle()); + + // Increment count, take ownership. + this->lock_count = 1; + this->owner_thread = kernel.GetCurrentEmuThreadID(); + } + } + + void Unlock() { + ASSERT(this->IsLockedByCurrentThread()); + ASSERT(this->lock_count > 0); + + // Release an instance of the lock. + if ((--this->lock_count) == 0) { + // We're no longer going to hold the lock. Take note of what cores need scheduling. + const u64 cores_needing_scheduling = + SchedulerType::UpdateHighestPriorityThreads(kernel); + Core::EmuThreadHandle leaving_thread = owner_thread; + + // Note that we no longer hold the lock, and unlock the spinlock. + this->owner_thread = Core::EmuThreadHandle::InvalidHandle(); + this->spin_lock.unlock(); + + // Enable scheduling, and perform a rescheduling operation. + SchedulerType::EnableScheduling(kernel, cores_needing_scheduling, leaving_thread); + } + } + +private: + KernelCore& kernel; + Common::SpinLock spin_lock{}; + s32 lock_count{}; + Core::EmuThreadHandle owner_thread{Core::EmuThreadHandle::InvalidHandle()}; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_scoped_lock.h b/src/core/hle/kernel/k_scoped_lock.h new file mode 100644 index 000000000..d7cc557b2 --- /dev/null +++ b/src/core/hle/kernel/k_scoped_lock.h @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphere, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. + +#pragma once + +#include "common/common_types.h" + +namespace Kernel { + +template <typename T> +concept KLockable = !std::is_reference_v<T> && requires(T & t) { + { t.Lock() } + ->std::same_as<void>; + { t.Unlock() } + ->std::same_as<void>; +}; + +template <typename T> +requires KLockable<T> class KScopedLock { +public: + explicit KScopedLock(T* l) : lock_ptr(l) { + this->lock_ptr->Lock(); + } + explicit KScopedLock(T& l) : KScopedLock(std::addressof(l)) { /* ... */ + } + ~KScopedLock() { + this->lock_ptr->Unlock(); + } + + KScopedLock(const KScopedLock&) = delete; + KScopedLock(KScopedLock&&) = delete; + +private: + T* lock_ptr; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h new file mode 100644 index 000000000..2bb3817fa --- /dev/null +++ b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h @@ -0,0 +1,50 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphere, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. + +#pragma once + +#include "common/common_types.h" +#include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" + +namespace Kernel { + +class KScopedSchedulerLockAndSleep { +public: + explicit KScopedSchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, Thread* t, + s64 timeout) + : kernel(kernel), event_handle(event_handle), thread(t), timeout_tick(timeout) { + event_handle = InvalidHandle; + + // Lock the scheduler. + kernel.GlobalSchedulerContext().scheduler_lock.Lock(); + } + + ~KScopedSchedulerLockAndSleep() { + // Register the sleep. + if (this->timeout_tick > 0) { + kernel.TimeManager().ScheduleTimeEvent(event_handle, this->thread, this->timeout_tick); + } + + // Unlock the scheduler. + kernel.GlobalSchedulerContext().scheduler_lock.Unlock(); + } + + void CancelSleep() { + this->timeout_tick = 0; + } + +private: + KernelCore& kernel; + Handle& event_handle; + Thread* thread{}; + s64 timeout_tick{}; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 929db696d..e8ece8164 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -8,13 +8,14 @@ #include <functional> #include <memory> #include <thread> -#include <unordered_map> +#include <unordered_set> #include <utility> #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" #include "common/thread.h" +#include "common/thread_worker.h" #include "core/arm/arm_interface.h" #include "core/arm/cpu_interrupt_handler.h" #include "core/arm/exclusive_monitor.h" @@ -27,6 +28,7 @@ #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/memory/memory_layout.h" #include "core/hle/kernel/memory/memory_manager.h" @@ -34,7 +36,7 @@ #include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" -#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/service_thread.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/thread.h" @@ -49,17 +51,20 @@ namespace Kernel { struct KernelCore::Impl { explicit Impl(Core::System& system, KernelCore& kernel) - : global_scheduler{kernel}, synchronization{system}, time_manager{system}, - global_handle_table{kernel}, system{system} {} + : synchronization{system}, time_manager{system}, global_handle_table{kernel}, system{ + system} {} void SetMulticore(bool is_multicore) { this->is_multicore = is_multicore; } void Initialize(KernelCore& kernel) { - Shutdown(); RegisterHostThread(); + global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); + service_thread_manager = + std::make_unique<Common::ThreadWorker>(1, "yuzu:ServiceThreadManager"); + InitializePhysicalCores(); InitializeSystemResourceLimit(kernel); InitializeMemoryLayout(); @@ -75,6 +80,12 @@ struct KernelCore::Impl { } void Shutdown() { + process_list.clear(); + + // Ensures all service threads gracefully shutdown + service_thread_manager.reset(); + service_threads.clear(); + next_object_id = 0; next_kernel_process_id = Process::InitialKIPIDMin; next_user_process_id = Process::ProcessIDMin; @@ -86,42 +97,29 @@ struct KernelCore::Impl { } } - for (std::size_t i = 0; i < cores.size(); i++) { - cores[i].Shutdown(); - schedulers[i].reset(); - } cores.clear(); - process_list.clear(); current_process = nullptr; system_resource_limit = nullptr; global_handle_table.Clear(); - preemption_event = nullptr; - global_scheduler.Shutdown(); + preemption_event = nullptr; named_ports.clear(); - for (auto& core : cores) { - core.Shutdown(); - } - cores.clear(); - exclusive_monitor.reset(); - num_host_threads = 0; - std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(), - std::thread::id{}); - std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0); + // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others + next_host_thread_id = Core::Hardware::NUM_CPU_CORES; } void InitializePhysicalCores() { exclusive_monitor = Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES); for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { - schedulers[i] = std::make_unique<Kernel::Scheduler>(system, i); + schedulers[i] = std::make_unique<Kernel::KScheduler>(system, i); cores.emplace_back(i, system, *schedulers[i], interrupts); } } @@ -154,8 +152,8 @@ struct KernelCore::Impl { preemption_event = Core::Timing::CreateEvent( "PreemptionCallback", [this, &kernel](std::uintptr_t, std::chrono::nanoseconds) { { - SchedulerLock lock(kernel); - global_scheduler.PreemptThreads(); + KScopedSchedulerLock lock(kernel); + global_scheduler_context->PreemptThreads(); } const auto time_interval = std::chrono::nanoseconds{ Core::Timing::msToCycles(std::chrono::milliseconds(10))}; @@ -194,58 +192,52 @@ struct KernelCore::Impl { } } + /// Creates a new host thread ID, should only be called by GetHostThreadId + u32 AllocateHostThreadId(std::optional<std::size_t> core_id) { + if (core_id) { + // The first for slots are reserved for CPU core threads + ASSERT(*core_id < Core::Hardware::NUM_CPU_CORES); + return static_cast<u32>(*core_id); + } else { + return next_host_thread_id++; + } + } + + /// Gets the host thread ID for the caller, allocating a new one if this is the first time + u32 GetHostThreadId(std::optional<std::size_t> core_id = std::nullopt) { + const thread_local auto host_thread_id{AllocateHostThreadId(core_id)}; + return host_thread_id; + } + + /// Registers a CPU core thread by allocating a host thread ID for it void RegisterCoreThread(std::size_t core_id) { - const std::thread::id this_id = std::this_thread::get_id(); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + const auto this_id = GetHostThreadId(core_id); if (!is_multicore) { single_core_thread_id = this_id; } - const auto end = - register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads); - const auto it = std::find(register_host_thread_keys.begin(), end, this_id); - ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); - ASSERT(it == end); - InsertHostThread(static_cast<u32>(core_id)); } + /// Registers a new host thread by allocating a host thread ID for it void RegisterHostThread() { - const std::thread::id this_id = std::this_thread::get_id(); - const auto end = - register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads); - const auto it = std::find(register_host_thread_keys.begin(), end, this_id); - if (it == end) { - InsertHostThread(registered_thread_ids++); - } + [[maybe_unused]] const auto this_id = GetHostThreadId(); } - void InsertHostThread(u32 value) { - const size_t index = num_host_threads++; - ASSERT_MSG(index < NUM_REGISTRABLE_HOST_THREADS, "Too many host threads"); - register_host_thread_values[index] = value; - register_host_thread_keys[index] = std::this_thread::get_id(); - } - - [[nodiscard]] u32 GetCurrentHostThreadID() const { - const std::thread::id this_id = std::this_thread::get_id(); + [[nodiscard]] u32 GetCurrentHostThreadID() { + const auto this_id = GetHostThreadId(); if (!is_multicore && single_core_thread_id == this_id) { return static_cast<u32>(system.GetCpuManager().CurrentCore()); } - const auto end = - register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads); - const auto it = std::find(register_host_thread_keys.begin(), end, this_id); - if (it == end) { - return Core::INVALID_HOST_THREAD_ID; - } - return register_host_thread_values[static_cast<size_t>( - std::distance(register_host_thread_keys.begin(), it))]; + return this_id; } - Core::EmuThreadHandle GetCurrentEmuThreadID() const { + [[nodiscard]] Core::EmuThreadHandle GetCurrentEmuThreadID() { Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle(); result.host_handle = GetCurrentHostThreadID(); if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) { return result; } - const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler(); + const Kernel::KScheduler& sched = cores[result.host_handle].Scheduler(); const Kernel::Thread* current = sched.GetCurrentThread(); if (current != nullptr && !current->IsPhantomMode()) { result.guest_handle = current->GetGlobalHandle(); @@ -314,7 +306,7 @@ struct KernelCore::Impl { // Lists all processes that exist in the current session. std::vector<std::shared_ptr<Process>> process_list; Process* current_process = nullptr; - Kernel::GlobalScheduler global_scheduler; + std::unique_ptr<Kernel::GlobalSchedulerContext> global_scheduler_context; Kernel::Synchronization synchronization; Kernel::TimeManager time_manager; @@ -333,15 +325,8 @@ struct KernelCore::Impl { std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; std::vector<Kernel::PhysicalCore> cores; - // 0-3 IDs represent core threads, >3 represent others - std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES}; - - // Number of host threads is a relatively high number to avoid overflowing - static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64; - std::atomic<size_t> num_host_threads{0}; - std::array<std::atomic<std::thread::id>, NUM_REGISTRABLE_HOST_THREADS> - register_host_thread_keys{}; - std::array<std::atomic<u32>, NUM_REGISTRABLE_HOST_THREADS> register_host_thread_values{}; + // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others + std::atomic<u32> next_host_thread_id{Core::Hardware::NUM_CPU_CORES}; // Kernel memory management std::unique_ptr<Memory::MemoryManager> memory_manager; @@ -353,12 +338,19 @@ struct KernelCore::Impl { std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; std::shared_ptr<Kernel::SharedMemory> time_shared_mem; + // Threads used for services + std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads; + + // Service threads are managed by a worker thread, so that a calling service thread can queue up + // the release of itself + std::unique_ptr<Common::ThreadWorker> service_thread_manager; + std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{}; - std::array<std::unique_ptr<Kernel::Scheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; + std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; bool is_multicore{}; - std::thread::id single_core_thread_id{}; + u32 single_core_thread_id{}; std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{}; @@ -415,19 +407,19 @@ const std::vector<std::shared_ptr<Process>>& KernelCore::GetProcessList() const return impl->process_list; } -Kernel::GlobalScheduler& KernelCore::GlobalScheduler() { - return impl->global_scheduler; +Kernel::GlobalSchedulerContext& KernelCore::GlobalSchedulerContext() { + return *impl->global_scheduler_context; } -const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { - return impl->global_scheduler; +const Kernel::GlobalSchedulerContext& KernelCore::GlobalSchedulerContext() const { + return *impl->global_scheduler_context; } -Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) { +Kernel::KScheduler& KernelCore::Scheduler(std::size_t id) { return *impl->schedulers[id]; } -const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const { +const Kernel::KScheduler& KernelCore::Scheduler(std::size_t id) const { return *impl->schedulers[id]; } @@ -451,16 +443,13 @@ const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const { return impl->cores[core_id]; } -Kernel::Scheduler& KernelCore::CurrentScheduler() { +Kernel::KScheduler* KernelCore::CurrentScheduler() { u32 core_id = impl->GetCurrentHostThreadID(); - ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); - return *impl->schedulers[core_id]; -} - -const Kernel::Scheduler& KernelCore::CurrentScheduler() const { - u32 core_id = impl->GetCurrentHostThreadID(); - ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); - return *impl->schedulers[core_id]; + if (core_id >= Core::Hardware::NUM_CPU_CORES) { + // This is expected when called from not a guest thread + return {}; + } + return impl->schedulers[core_id].get(); } std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() { @@ -623,7 +612,7 @@ const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const { void KernelCore::Suspend(bool in_suspention) { const bool should_suspend = exception_exited || in_suspention; { - SchedulerLock lock(*this); + KScopedSchedulerLock lock(*this); ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep; for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { impl->suspend_threads[i]->SetStatus(status); @@ -650,4 +639,19 @@ void KernelCore::ExitSVCProfile() { MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); } +std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) { + auto service_thread = std::make_shared<Kernel::ServiceThread>(*this, 1, name); + impl->service_thread_manager->QueueWork( + [this, service_thread] { impl->service_threads.emplace(service_thread); }); + return service_thread; +} + +void KernelCore::ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread) { + impl->service_thread_manager->QueueWork([this, service_thread] { + if (auto strong_ptr = service_thread.lock()) { + impl->service_threads.erase(strong_ptr); + } + }); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index a73a93039..e3169f5a7 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -35,13 +35,14 @@ class SlabHeap; class AddressArbiter; class ClientPort; -class GlobalScheduler; +class GlobalSchedulerContext; class HandleTable; class PhysicalCore; class Process; class ResourceLimit; -class Scheduler; +class KScheduler; class SharedMemory; +class ServiceThread; class Synchronization; class Thread; class TimeManager; @@ -102,16 +103,16 @@ public: const std::vector<std::shared_ptr<Process>>& GetProcessList() const; /// Gets the sole instance of the global scheduler - Kernel::GlobalScheduler& GlobalScheduler(); + Kernel::GlobalSchedulerContext& GlobalSchedulerContext(); /// Gets the sole instance of the global scheduler - const Kernel::GlobalScheduler& GlobalScheduler() const; + const Kernel::GlobalSchedulerContext& GlobalSchedulerContext() const; /// Gets the sole instance of the Scheduler assoviated with cpu core 'id' - Kernel::Scheduler& Scheduler(std::size_t id); + Kernel::KScheduler& Scheduler(std::size_t id); /// Gets the sole instance of the Scheduler assoviated with cpu core 'id' - const Kernel::Scheduler& Scheduler(std::size_t id) const; + const Kernel::KScheduler& Scheduler(std::size_t id) const; /// Gets the an instance of the respective physical CPU core. Kernel::PhysicalCore& PhysicalCore(std::size_t id); @@ -120,10 +121,7 @@ public: const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; /// Gets the sole instance of the Scheduler at the current running core. - Kernel::Scheduler& CurrentScheduler(); - - /// Gets the sole instance of the Scheduler at the current running core. - const Kernel::Scheduler& CurrentScheduler() const; + Kernel::KScheduler* CurrentScheduler(); /// Gets the an instance of the current physical CPU core. Kernel::PhysicalCore& CurrentPhysicalCore(); @@ -230,6 +228,22 @@ public: void ExitSVCProfile(); + /** + * Creates an HLE service thread, which are used to execute service routines asynchronously. + * While these are allocated per ServerSession, these need to be owned and managed outside of + * ServerSession to avoid a circular dependency. + * @param name String name for the ServerSession creating this thread, used for debug purposes. + * @returns The a weak pointer newly created service thread. + */ + std::weak_ptr<Kernel::ServiceThread> CreateServiceThread(const std::string& name); + + /** + * Releases a HLE service thread, instructing KernelCore to free it. This should be called when + * the ServerSession associated with the thread is destroyed. + * @param service_thread Service thread to release. + */ + void ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread); + private: friend class Object; friend class Process; diff --git a/src/core/hle/kernel/memory/memory_block.h b/src/core/hle/kernel/memory/memory_block.h index 9d7839d08..37fe19916 100644 --- a/src/core/hle/kernel/memory/memory_block.h +++ b/src/core/hle/kernel/memory/memory_block.h @@ -222,9 +222,9 @@ public: public: constexpr MemoryBlock() = default; - constexpr MemoryBlock(VAddr addr, std::size_t num_pages, MemoryState state, - MemoryPermission perm, MemoryAttribute attribute) - : addr{addr}, num_pages(num_pages), state{state}, perm{perm}, attribute{attribute} {} + constexpr MemoryBlock(VAddr addr_, std::size_t num_pages_, MemoryState state_, + MemoryPermission perm_, MemoryAttribute attribute_) + : addr{addr_}, num_pages(num_pages_), state{state_}, perm{perm_}, attribute{attribute_} {} constexpr VAddr GetAddress() const { return addr; diff --git a/src/core/hle/kernel/memory/memory_block_manager.h b/src/core/hle/kernel/memory/memory_block_manager.h index 6e1d41075..f57d1bbcc 100644 --- a/src/core/hle/kernel/memory/memory_block_manager.h +++ b/src/core/hle/kernel/memory/memory_block_manager.h @@ -57,8 +57,8 @@ public: private: void MergeAdjacent(iterator it, iterator& next_it); - const VAddr start_addr; - const VAddr end_addr; + [[maybe_unused]] const VAddr start_addr; + [[maybe_unused]] const VAddr end_addr; MemoryBlockTree memory_block_tree; }; diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index 8f6c944d1..4f8075e0e 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -11,11 +11,11 @@ #include "core/core.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/mutex.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "core/hle/result.h" #include "core/memory.h" @@ -73,9 +73,9 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle, auto& kernel = system.Kernel(); std::shared_ptr<Thread> current_thread = - SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); + SharedFrom(kernel.CurrentScheduler()->GetCurrentThread()); { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); // The mutex address must be 4-byte aligned if ((address % sizeof(u32)) != 0) { return ERR_INVALID_ADDRESS; @@ -114,7 +114,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle, } { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); auto* owner = current_thread->GetLockOwner(); if (owner != nullptr) { owner->RemoveMutexWaiter(current_thread); @@ -153,10 +153,10 @@ std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thr ResultCode Mutex::Release(VAddr address) { auto& kernel = system.Kernel(); - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); std::shared_ptr<Thread> current_thread = - SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); + SharedFrom(kernel.CurrentScheduler()->GetCurrentThread()); auto [result, new_owner] = Unlock(current_thread, address); diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index 50aca5752..7fea45f96 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -7,14 +7,14 @@ #include "core/arm/dynarmic/arm_dynarmic_32.h" #include "core/arm/dynarmic/arm_dynarmic_64.h" #include "core/core.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/physical_core.h" -#include "core/hle/kernel/scheduler.h" namespace Kernel { PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, - Kernel::Scheduler& scheduler, Core::CPUInterrupts& interrupts) + Kernel::KScheduler& scheduler, Core::CPUInterrupts& interrupts) : core_index{core_index}, system{system}, scheduler{scheduler}, interrupts{interrupts}, guard{std::make_unique<Common::SpinLock>()} {} @@ -43,10 +43,6 @@ void PhysicalCore::Idle() { interrupts[core_index].AwaitInterrupt(); } -void PhysicalCore::Shutdown() { - scheduler.Shutdown(); -} - bool PhysicalCore::IsInterrupted() const { return interrupts[core_index].IsInterrupted(); } diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 37513130a..f2b0911aa 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -15,7 +15,7 @@ class SpinLock; } namespace Kernel { -class Scheduler; +class KScheduler; } // namespace Kernel namespace Core { @@ -28,7 +28,7 @@ namespace Kernel { class PhysicalCore { public: - PhysicalCore(std::size_t core_index, Core::System& system, Kernel::Scheduler& scheduler, + PhysicalCore(std::size_t core_index, Core::System& system, Kernel::KScheduler& scheduler, Core::CPUInterrupts& interrupts); ~PhysicalCore(); @@ -36,7 +36,7 @@ public: PhysicalCore& operator=(const PhysicalCore&) = delete; PhysicalCore(PhysicalCore&&) = default; - PhysicalCore& operator=(PhysicalCore&&) = default; + PhysicalCore& operator=(PhysicalCore&&) = delete; /// Initialize the core for the specified parameters. void Initialize(bool is_64_bit); @@ -55,9 +55,6 @@ public: /// Check if this core is interrupted bool IsInterrupted() const; - // Shutdown this physical core. - void Shutdown(); - bool IsInitialized() const { return arm_interface != nullptr; } @@ -82,18 +79,18 @@ public: return core_index; } - Kernel::Scheduler& Scheduler() { + Kernel::KScheduler& Scheduler() { return scheduler; } - const Kernel::Scheduler& Scheduler() const { + const Kernel::KScheduler& Scheduler() const { return scheduler; } private: const std::size_t core_index; Core::System& system; - Kernel::Scheduler& scheduler; + Kernel::KScheduler& scheduler; Core::CPUInterrupts& interrupts; std::unique_ptr<Common::SpinLock> guard; std::unique_ptr<Core::ARM_Interface> arm_interface; diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index b17529dee..b905b486a 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -15,13 +15,13 @@ #include "core/file_sys/program_metadata.h" #include "core/hle/kernel/code_set.h" #include "core/hle/kernel/errors.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/memory/memory_block_manager.h" #include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/memory/slab_heap.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "core/hle/lock.h" #include "core/memory.h" @@ -54,7 +54,7 @@ void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, auto& kernel = system.Kernel(); // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires { - SchedulerLock lock{kernel}; + KScopedSchedulerLock lock{kernel}; thread->SetStatus(ThreadStatus::Ready); } } @@ -213,7 +213,7 @@ void Process::UnregisterThread(const Thread* thread) { } ResultCode Process::ClearSignalState() { - SchedulerLock lock(system.Kernel()); + KScopedSchedulerLock lock(system.Kernel()); if (status == ProcessStatus::Exited) { LOG_ERROR(Kernel, "called on a terminated process instance."); return ERR_INVALID_STATE; @@ -314,7 +314,7 @@ void Process::PrepareForTermination() { if (thread->GetOwnerProcess() != this) continue; - if (thread.get() == system.CurrentScheduler().GetCurrentThread()) + if (thread.get() == kernel.CurrentScheduler()->GetCurrentThread()) continue; // TODO(Subv): When are the other running/ready threads terminated? @@ -325,7 +325,7 @@ void Process::PrepareForTermination() { } }; - stop_threads(system.GlobalScheduler().GetThreadList()); + stop_threads(system.GlobalSchedulerContext().GetThreadList()); FreeTLSRegion(tls_region_address); tls_region_address = 0; @@ -347,7 +347,7 @@ static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) { } VAddr Process::CreateTLSRegion() { - SchedulerLock lock(system.Kernel()); + KScopedSchedulerLock lock(system.Kernel()); if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)}; tls_page_iter != tls_pages.cend()) { return *tls_page_iter->ReserveSlot(); @@ -378,7 +378,7 @@ VAddr Process::CreateTLSRegion() { } void Process::FreeTLSRegion(VAddr tls_address) { - SchedulerLock lock(system.Kernel()); + KScopedSchedulerLock lock(system.Kernel()); const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE); auto iter = std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index f45cb5674..e412e58aa 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -216,6 +216,16 @@ public: total_process_running_time_ticks += ticks; } + /// Gets the process schedule count, used for thread yelding + s64 GetScheduledCount() const { + return schedule_count; + } + + /// Increments the process schedule count, used for thread yielding. + void IncrementScheduledCount() { + ++schedule_count; + } + /// Gets 8 bytes of random data for svcGetInfo RandomEntropy u64 GetRandomEntropy(std::size_t index) const { return random_entropy.at(index); @@ -397,6 +407,9 @@ private: /// Name of this process std::string name; + /// Schedule count of this process + s64 schedule_count{}; + /// System context Core::System& system; }; diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp index 63880f13d..0f128c586 100644 --- a/src/core/hle/kernel/process_capability.cpp +++ b/src/core/hle/kernel/process_capability.cpp @@ -199,7 +199,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s break; } - LOG_ERROR(Kernel, "Invalid capability type! type={}", static_cast<u32>(type)); + LOG_ERROR(Kernel, "Invalid capability type! type={}", type); return ERR_INVALID_CAPABILITY_DESCRIPTOR; } diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index 6e286419e..cea262ce0 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp @@ -6,10 +6,10 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/hle/kernel/errors.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/readable_event.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" namespace Kernel { @@ -39,7 +39,7 @@ void ReadableEvent::Clear() { } ResultCode ReadableEvent::Reset() { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); if (!is_signaled) { LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}", GetObjectId(), GetTypeName(), GetName()); diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index 212e442f4..7bf50339d 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp @@ -65,8 +65,8 @@ ResultCode ResourceLimit::SetLimitValue(ResourceType resource, s64 value) { limit[index] = value; return RESULT_SUCCESS; } else { - LOG_ERROR(Kernel, "Limit value is too large! resource={}, value={}, index={}", - static_cast<u32>(resource), value, index); + LOG_ERROR(Kernel, "Limit value is too large! resource={}, value={}, index={}", resource, + value, index); return ERR_INVALID_STATE; } } diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp deleted file mode 100644 index 5c63b0b4a..000000000 --- a/src/core/hle/kernel/scheduler.cpp +++ /dev/null @@ -1,819 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. -// -// SelectThreads, Yield functions originally by TuxSH. -// licensed under GPLv2 or later under exception provided by the author. - -#include <algorithm> -#include <mutex> -#include <set> -#include <unordered_set> -#include <utility> - -#include "common/assert.h" -#include "common/bit_util.h" -#include "common/fiber.h" -#include "common/logging/log.h" -#include "core/arm/arm_interface.h" -#include "core/core.h" -#include "core/core_timing.h" -#include "core/cpu_manager.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/physical_core.h" -#include "core/hle/kernel/process.h" -#include "core/hle/kernel/scheduler.h" -#include "core/hle/kernel/time_manager.h" - -namespace Kernel { - -GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {} - -GlobalScheduler::~GlobalScheduler() = default; - -void GlobalScheduler::AddThread(std::shared_ptr<Thread> thread) { - std::scoped_lock lock{global_list_guard}; - thread_list.push_back(std::move(thread)); -} - -void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) { - std::scoped_lock lock{global_list_guard}; - thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), - thread_list.end()); -} - -u32 GlobalScheduler::SelectThreads() { - ASSERT(is_locked); - const auto update_thread = [](Thread* thread, Scheduler& sched) { - std::scoped_lock lock{sched.guard}; - if (thread != sched.selected_thread_set.get()) { - if (thread == nullptr) { - ++sched.idle_selection_count; - } - sched.selected_thread_set = SharedFrom(thread); - } - const bool reschedule_pending = - sched.is_context_switch_pending || (sched.selected_thread_set != sched.current_thread); - sched.is_context_switch_pending = reschedule_pending; - std::atomic_thread_fence(std::memory_order_seq_cst); - return reschedule_pending; - }; - if (!is_reselection_pending.load()) { - return 0; - } - std::array<Thread*, Core::Hardware::NUM_CPU_CORES> top_threads{}; - - u32 idle_cores{}; - - // Step 1: Get top thread in schedule queue. - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - Thread* top_thread = - scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); - if (top_thread != nullptr) { - // TODO(Blinkhawk): Implement Thread Pinning - } else { - idle_cores |= (1U << core); - } - top_threads[core] = top_thread; - } - - while (idle_cores != 0) { - u32 core_id = Common::CountTrailingZeroes32(idle_cores); - - if (!suggested_queue[core_id].empty()) { - std::array<s32, Core::Hardware::NUM_CPU_CORES> migration_candidates{}; - std::size_t num_candidates = 0; - auto iter = suggested_queue[core_id].begin(); - Thread* suggested = nullptr; - // Step 2: Try selecting a suggested thread. - while (iter != suggested_queue[core_id].end()) { - suggested = *iter; - iter++; - s32 suggested_core_id = suggested->GetProcessorID(); - Thread* top_thread = - suggested_core_id >= 0 ? top_threads[suggested_core_id] : nullptr; - if (top_thread != suggested) { - if (top_thread != nullptr && - top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) { - suggested = nullptr; - break; - // There's a too high thread to do core migration, cancel - } - TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), suggested); - break; - } - suggested = nullptr; - migration_candidates[num_candidates++] = suggested_core_id; - } - // Step 3: Select a suggested thread from another core - if (suggested == nullptr) { - for (std::size_t i = 0; i < num_candidates; i++) { - s32 candidate_core = migration_candidates[i]; - suggested = top_threads[candidate_core]; - auto it = scheduled_queue[candidate_core].begin(); - it++; - Thread* next = it != scheduled_queue[candidate_core].end() ? *it : nullptr; - if (next != nullptr) { - TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), - suggested); - top_threads[candidate_core] = next; - break; - } else { - suggested = nullptr; - } - } - } - top_threads[core_id] = suggested; - } - - idle_cores &= ~(1U << core_id); - } - u32 cores_needing_context_switch{}; - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - Scheduler& sched = kernel.Scheduler(core); - ASSERT(top_threads[core] == nullptr || - static_cast<u32>(top_threads[core]->GetProcessorID()) == core); - if (update_thread(top_threads[core], sched)) { - cores_needing_context_switch |= (1U << core); - } - } - return cores_needing_context_switch; -} - -bool GlobalScheduler::YieldThread(Thread* yielding_thread) { - ASSERT(is_locked); - // Note: caller should use critical section, etc. - if (!yielding_thread->IsRunnable()) { - // Normally this case shouldn't happen except for SetThreadActivity. - is_reselection_pending.store(true, std::memory_order_release); - return false; - } - const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); - const u32 priority = yielding_thread->GetPriority(); - - // Yield the thread - Reschedule(priority, core_id, yielding_thread); - const Thread* const winner = scheduled_queue[core_id].front(); - if (kernel.GetCurrentHostThreadID() != core_id) { - is_reselection_pending.store(true, std::memory_order_release); - } - - return AskForReselectionOrMarkRedundant(yielding_thread, winner); -} - -bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { - ASSERT(is_locked); - // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, - // etc. - if (!yielding_thread->IsRunnable()) { - // Normally this case shouldn't happen except for SetThreadActivity. - is_reselection_pending.store(true, std::memory_order_release); - return false; - } - const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); - const u32 priority = yielding_thread->GetPriority(); - - // Yield the thread - Reschedule(priority, core_id, yielding_thread); - - std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads; - for (std::size_t i = 0; i < current_threads.size(); i++) { - current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); - } - - Thread* next_thread = scheduled_queue[core_id].front(priority); - Thread* winner = nullptr; - for (auto& thread : suggested_queue[core_id]) { - const s32 source_core = thread->GetProcessorID(); - if (source_core >= 0) { - if (current_threads[source_core] != nullptr) { - if (thread == current_threads[source_core] || - current_threads[source_core]->GetPriority() < min_regular_priority) { - continue; - } - } - } - if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() || - next_thread->GetPriority() < thread->GetPriority()) { - if (thread->GetPriority() <= priority) { - winner = thread; - break; - } - } - } - - if (winner != nullptr) { - if (winner != yielding_thread) { - TransferToCore(winner->GetPriority(), s32(core_id), winner); - } - } else { - winner = next_thread; - } - - if (kernel.GetCurrentHostThreadID() != core_id) { - is_reselection_pending.store(true, std::memory_order_release); - } - - return AskForReselectionOrMarkRedundant(yielding_thread, winner); -} - -bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) { - ASSERT(is_locked); - // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, - // etc. - if (!yielding_thread->IsRunnable()) { - // Normally this case shouldn't happen except for SetThreadActivity. - is_reselection_pending.store(true, std::memory_order_release); - return false; - } - Thread* winner = nullptr; - const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); - - // Remove the thread from its scheduled mlq, put it on the corresponding "suggested" one instead - TransferToCore(yielding_thread->GetPriority(), -1, yielding_thread); - - // If the core is idle, perform load balancing, excluding the threads that have just used this - // function... - if (scheduled_queue[core_id].empty()) { - // Here, "current_threads" is calculated after the ""yield"", unlike yield -1 - std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads; - for (std::size_t i = 0; i < current_threads.size(); i++) { - current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); - } - for (auto& thread : suggested_queue[core_id]) { - const s32 source_core = thread->GetProcessorID(); - if (source_core < 0 || thread == current_threads[source_core]) { - continue; - } - if (current_threads[source_core] == nullptr || - current_threads[source_core]->GetPriority() >= min_regular_priority) { - winner = thread; - } - break; - } - if (winner != nullptr) { - if (winner != yielding_thread) { - TransferToCore(winner->GetPriority(), static_cast<s32>(core_id), winner); - } - } else { - winner = yielding_thread; - } - } else { - winner = scheduled_queue[core_id].front(); - } - - if (kernel.GetCurrentHostThreadID() != core_id) { - is_reselection_pending.store(true, std::memory_order_release); - } - - return AskForReselectionOrMarkRedundant(yielding_thread, winner); -} - -void GlobalScheduler::PreemptThreads() { - ASSERT(is_locked); - for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { - const u32 priority = preemption_priorities[core_id]; - - if (scheduled_queue[core_id].size(priority) > 0) { - if (scheduled_queue[core_id].size(priority) > 1) { - scheduled_queue[core_id].front(priority)->IncrementYieldCount(); - } - scheduled_queue[core_id].yield(priority); - if (scheduled_queue[core_id].size(priority) > 1) { - scheduled_queue[core_id].front(priority)->IncrementYieldCount(); - } - } - - Thread* current_thread = - scheduled_queue[core_id].empty() ? nullptr : scheduled_queue[core_id].front(); - Thread* winner = nullptr; - for (auto& thread : suggested_queue[core_id]) { - const s32 source_core = thread->GetProcessorID(); - if (thread->GetPriority() != priority) { - continue; - } - if (source_core >= 0) { - Thread* next_thread = scheduled_queue[source_core].empty() - ? nullptr - : scheduled_queue[source_core].front(); - if (next_thread != nullptr && next_thread->GetPriority() < 2) { - break; - } - if (next_thread == thread) { - continue; - } - } - if (current_thread != nullptr && - current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) { - winner = thread; - break; - } - } - - if (winner != nullptr) { - TransferToCore(winner->GetPriority(), s32(core_id), winner); - current_thread = - winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread; - } - - if (current_thread != nullptr && current_thread->GetPriority() > priority) { - for (auto& thread : suggested_queue[core_id]) { - const s32 source_core = thread->GetProcessorID(); - if (thread->GetPriority() < priority) { - continue; - } - if (source_core >= 0) { - Thread* next_thread = scheduled_queue[source_core].empty() - ? nullptr - : scheduled_queue[source_core].front(); - if (next_thread != nullptr && next_thread->GetPriority() < 2) { - break; - } - if (next_thread == thread) { - continue; - } - } - if (current_thread != nullptr && - current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) { - winner = thread; - break; - } - } - - if (winner != nullptr) { - TransferToCore(winner->GetPriority(), s32(core_id), winner); - current_thread = winner; - } - } - - is_reselection_pending.store(true, std::memory_order_release); - } -} - -void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule, - Core::EmuThreadHandle global_thread) { - u32 current_core = global_thread.host_handle; - bool must_context_switch = global_thread.guest_handle != InvalidHandle && - (current_core < Core::Hardware::NUM_CPU_CORES); - while (cores_pending_reschedule != 0) { - u32 core = Common::CountTrailingZeroes32(cores_pending_reschedule); - ASSERT(core < Core::Hardware::NUM_CPU_CORES); - if (!must_context_switch || core != current_core) { - auto& phys_core = kernel.PhysicalCore(core); - phys_core.Interrupt(); - } else { - must_context_switch = true; - } - cores_pending_reschedule &= ~(1U << core); - } - if (must_context_switch) { - auto& core_scheduler = kernel.CurrentScheduler(); - kernel.ExitSVCProfile(); - core_scheduler.TryDoContextSwitch(); - kernel.EnterSVCProfile(); - } -} - -void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) { - ASSERT(is_locked); - suggested_queue[core].add(thread, priority); -} - -void GlobalScheduler::Unsuggest(u32 priority, std::size_t core, Thread* thread) { - ASSERT(is_locked); - suggested_queue[core].remove(thread, priority); -} - -void GlobalScheduler::Schedule(u32 priority, std::size_t core, Thread* thread) { - ASSERT(is_locked); - ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core."); - scheduled_queue[core].add(thread, priority); -} - -void GlobalScheduler::SchedulePrepend(u32 priority, std::size_t core, Thread* thread) { - ASSERT(is_locked); - ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core."); - scheduled_queue[core].add(thread, priority, false); -} - -void GlobalScheduler::Reschedule(u32 priority, std::size_t core, Thread* thread) { - ASSERT(is_locked); - scheduled_queue[core].remove(thread, priority); - scheduled_queue[core].add(thread, priority); -} - -void GlobalScheduler::Unschedule(u32 priority, std::size_t core, Thread* thread) { - ASSERT(is_locked); - scheduled_queue[core].remove(thread, priority); -} - -void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) { - ASSERT(is_locked); - const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT; - const s32 source_core = thread->GetProcessorID(); - if (source_core == destination_core || !schedulable) { - return; - } - thread->SetProcessorID(destination_core); - if (source_core >= 0) { - Unschedule(priority, static_cast<u32>(source_core), thread); - } - if (destination_core >= 0) { - Unsuggest(priority, static_cast<u32>(destination_core), thread); - Schedule(priority, static_cast<u32>(destination_core), thread); - } - if (source_core >= 0) { - Suggest(priority, static_cast<u32>(source_core), thread); - } -} - -bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, - const Thread* winner) { - if (current_thread == winner) { - current_thread->IncrementYieldCount(); - return true; - } else { - is_reselection_pending.store(true, std::memory_order_release); - return false; - } -} - -void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) { - if (old_flags == thread->scheduling_state) { - return; - } - ASSERT(is_locked); - - if (old_flags == static_cast<u32>(ThreadSchedStatus::Runnable)) { - // In this case the thread was running, now it's pausing/exitting - if (thread->processor_id >= 0) { - Unschedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread); - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast<u32>(thread->processor_id) && - ((thread->affinity_mask >> core) & 1) != 0) { - Unsuggest(thread->current_priority, core, thread); - } - } - } else if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { - // The thread is now set to running from being stopped - if (thread->processor_id >= 0) { - Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread); - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast<u32>(thread->processor_id) && - ((thread->affinity_mask >> core) & 1) != 0) { - Suggest(thread->current_priority, core, thread); - } - } - } - - SetReselectionPending(); -} - -void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) { - if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable)) { - return; - } - ASSERT(is_locked); - if (thread->processor_id >= 0) { - Unschedule(old_priority, static_cast<u32>(thread->processor_id), thread); - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast<u32>(thread->processor_id) && - ((thread->affinity_mask >> core) & 1) != 0) { - Unsuggest(old_priority, core, thread); - } - } - - if (thread->processor_id >= 0) { - if (thread == kernel.CurrentScheduler().GetCurrentThread()) { - SchedulePrepend(thread->current_priority, static_cast<u32>(thread->processor_id), - thread); - } else { - Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread); - } - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast<u32>(thread->processor_id) && - ((thread->affinity_mask >> core) & 1) != 0) { - Suggest(thread->current_priority, core, thread); - } - } - thread->IncrementYieldCount(); - SetReselectionPending(); -} - -void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, - s32 old_core) { - if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable) || - thread->current_priority >= THREADPRIO_COUNT) { - return; - } - ASSERT(is_locked); - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (((old_affinity_mask >> core) & 1) != 0) { - if (core == static_cast<u32>(old_core)) { - Unschedule(thread->current_priority, core, thread); - } else { - Unsuggest(thread->current_priority, core, thread); - } - } - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (((thread->affinity_mask >> core) & 1) != 0) { - if (core == static_cast<u32>(thread->processor_id)) { - Schedule(thread->current_priority, core, thread); - } else { - Suggest(thread->current_priority, core, thread); - } - } - } - - thread->IncrementYieldCount(); - SetReselectionPending(); -} - -void GlobalScheduler::Shutdown() { - for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - scheduled_queue[core].clear(); - suggested_queue[core].clear(); - } - thread_list.clear(); -} - -void GlobalScheduler::Lock() { - Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID(); - ASSERT(!current_thread.IsInvalid()); - if (current_thread == current_owner) { - ++scope_lock; - } else { - inner_lock.lock(); - is_locked = true; - current_owner = current_thread; - ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle()); - scope_lock = 1; - } -} - -void GlobalScheduler::Unlock() { - if (--scope_lock != 0) { - ASSERT(scope_lock > 0); - return; - } - u32 cores_pending_reschedule = SelectThreads(); - Core::EmuThreadHandle leaving_thread = current_owner; - current_owner = Core::EmuThreadHandle::InvalidHandle(); - scope_lock = 1; - is_locked = false; - inner_lock.unlock(); - EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread); -} - -Scheduler::Scheduler(Core::System& system, std::size_t core_id) : system(system), core_id(core_id) { - switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this); -} - -Scheduler::~Scheduler() = default; - -bool Scheduler::HaveReadyThreads() const { - return system.GlobalScheduler().HaveReadyThreads(core_id); -} - -Thread* Scheduler::GetCurrentThread() const { - if (current_thread) { - return current_thread.get(); - } - return idle_thread.get(); -} - -Thread* Scheduler::GetSelectedThread() const { - return selected_thread.get(); -} - -u64 Scheduler::GetLastContextSwitchTicks() const { - return last_context_switch_time; -} - -void Scheduler::TryDoContextSwitch() { - auto& phys_core = system.Kernel().CurrentPhysicalCore(); - if (phys_core.IsInterrupted()) { - phys_core.ClearInterrupt(); - } - guard.lock(); - if (is_context_switch_pending) { - SwitchContext(); - } else { - guard.unlock(); - } -} - -void Scheduler::OnThreadStart() { - SwitchContextStep2(); -} - -void Scheduler::Unload(Thread* thread) { - if (thread) { - thread->last_running_ticks = system.CoreTiming().GetCPUTicks(); - thread->SetIsRunning(false); - if (thread->IsContinuousOnSVC() && !thread->IsHLEThread()) { - system.ArmInterface(core_id).ExceptionalExit(); - thread->SetContinuousOnSVC(false); - } - if (!thread->IsHLEThread() && !thread->HasExited()) { - Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); - cpu_core.SaveContext(thread->GetContext32()); - cpu_core.SaveContext(thread->GetContext64()); - // Save the TPIDR_EL0 system register in case it was modified. - thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); - cpu_core.ClearExclusiveState(); - } - thread->context_guard.unlock(); - } -} - -void Scheduler::Unload() { - Unload(current_thread.get()); -} - -void Scheduler::Reload(Thread* thread) { - if (thread) { - ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable, - "Thread must be runnable."); - - // Cancel any outstanding wakeup events for this thread - thread->SetIsRunning(true); - thread->SetWasRunning(false); - thread->last_running_ticks = system.CoreTiming().GetCPUTicks(); - - auto* const thread_owner_process = thread->GetOwnerProcess(); - if (thread_owner_process != nullptr) { - system.Kernel().MakeCurrentProcess(thread_owner_process); - } - if (!thread->IsHLEThread()) { - Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); - cpu_core.LoadContext(thread->GetContext32()); - cpu_core.LoadContext(thread->GetContext64()); - cpu_core.SetTlsAddress(thread->GetTLSAddress()); - cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); - cpu_core.ClearExclusiveState(); - } - } -} - -void Scheduler::Reload() { - Reload(current_thread.get()); -} - -void Scheduler::SwitchContextStep2() { - // Load context of new thread - Reload(selected_thread.get()); - - TryDoContextSwitch(); -} - -void Scheduler::SwitchContext() { - current_thread_prev = current_thread; - selected_thread = selected_thread_set; - Thread* previous_thread = current_thread_prev.get(); - Thread* new_thread = selected_thread.get(); - current_thread = selected_thread; - - is_context_switch_pending = false; - - if (new_thread == previous_thread) { - guard.unlock(); - return; - } - - Process* const previous_process = system.Kernel().CurrentProcess(); - - UpdateLastContextSwitchTime(previous_thread, previous_process); - - // Save context for previous thread - Unload(previous_thread); - - std::shared_ptr<Common::Fiber>* old_context; - if (previous_thread != nullptr) { - old_context = &previous_thread->GetHostContext(); - } else { - old_context = &idle_thread->GetHostContext(); - } - guard.unlock(); - - Common::Fiber::YieldTo(*old_context, switch_fiber); - /// When a thread wakes up, the scheduler may have changed to other in another core. - auto& next_scheduler = system.Kernel().CurrentScheduler(); - next_scheduler.SwitchContextStep2(); -} - -void Scheduler::OnSwitch(void* this_scheduler) { - Scheduler* sched = static_cast<Scheduler*>(this_scheduler); - sched->SwitchToCurrent(); -} - -void Scheduler::SwitchToCurrent() { - while (true) { - { - std::scoped_lock lock{guard}; - selected_thread = selected_thread_set; - current_thread = selected_thread; - is_context_switch_pending = false; - } - const auto is_switch_pending = [this] { - std::scoped_lock lock{guard}; - return is_context_switch_pending; - }; - do { - if (current_thread != nullptr && !current_thread->IsHLEThread()) { - current_thread->context_guard.lock(); - if (!current_thread->IsRunnable()) { - current_thread->context_guard.unlock(); - break; - } - if (static_cast<u32>(current_thread->GetProcessorID()) != core_id) { - current_thread->context_guard.unlock(); - break; - } - } - std::shared_ptr<Common::Fiber>* next_context; - if (current_thread != nullptr) { - next_context = ¤t_thread->GetHostContext(); - } else { - next_context = &idle_thread->GetHostContext(); - } - Common::Fiber::YieldTo(switch_fiber, *next_context); - } while (!is_switch_pending()); - } -} - -void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { - const u64 prev_switch_ticks = last_context_switch_time; - const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks(); - const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; - - if (thread != nullptr) { - thread->UpdateCPUTimeTicks(update_ticks); - } - - if (process != nullptr) { - process->UpdateCPUTimeTicks(update_ticks); - } - - last_context_switch_time = most_recent_switch_ticks; -} - -void Scheduler::Initialize() { - std::string name = "Idle Thread Id:" + std::to_string(core_id); - std::function<void(void*)> init_func = Core::CpuManager::GetIdleThreadStartFunc(); - void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); - ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE); - auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0, - nullptr, std::move(init_func), init_func_parameter); - idle_thread = std::move(thread_res).Unwrap(); -} - -void Scheduler::Shutdown() { - current_thread = nullptr; - selected_thread = nullptr; -} - -SchedulerLock::SchedulerLock(KernelCore& kernel) : kernel{kernel} { - kernel.GlobalScheduler().Lock(); -} - -SchedulerLock::~SchedulerLock() { - kernel.GlobalScheduler().Unlock(); -} - -SchedulerLockAndSleep::SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, - Thread* time_task, s64 nanoseconds) - : SchedulerLock{kernel}, event_handle{event_handle}, time_task{time_task}, nanoseconds{ - nanoseconds} { - event_handle = InvalidHandle; -} - -SchedulerLockAndSleep::~SchedulerLockAndSleep() { - if (sleep_cancelled) { - return; - } - auto& time_manager = kernel.TimeManager(); - time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds); -} - -void SchedulerLockAndSleep::Release() { - if (sleep_cancelled) { - return; - } - auto& time_manager = kernel.TimeManager(); - time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds); - sleep_cancelled = true; -} - -} // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h deleted file mode 100644 index 68db4a5ef..000000000 --- a/src/core/hle/kernel/scheduler.h +++ /dev/null @@ -1,320 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <atomic> -#include <memory> -#include <mutex> -#include <vector> - -#include "common/common_types.h" -#include "common/multi_level_queue.h" -#include "common/spin_lock.h" -#include "core/hardware_properties.h" -#include "core/hle/kernel/thread.h" - -namespace Common { -class Fiber; -} - -namespace Core { -class ARM_Interface; -class System; -} // namespace Core - -namespace Kernel { - -class KernelCore; -class Process; -class SchedulerLock; - -class GlobalScheduler final { -public: - explicit GlobalScheduler(KernelCore& kernel); - ~GlobalScheduler(); - - /// Adds a new thread to the scheduler - void AddThread(std::shared_ptr<Thread> thread); - - /// Removes a thread from the scheduler - void RemoveThread(std::shared_ptr<Thread> thread); - - /// Returns a list of all threads managed by the scheduler - const std::vector<std::shared_ptr<Thread>>& GetThreadList() const { - return thread_list; - } - - /// Notify the scheduler a thread's status has changed. - void AdjustSchedulingOnStatus(Thread* thread, u32 old_flags); - - /// Notify the scheduler a thread's priority has changed. - void AdjustSchedulingOnPriority(Thread* thread, u32 old_priority); - - /// Notify the scheduler a thread's core and/or affinity mask has changed. - void AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, s32 old_core); - - /** - * Takes care of selecting the new scheduled threads in three steps: - * - * 1. First a thread is selected from the top of the priority queue. If no thread - * is obtained then we move to step two, else we are done. - * - * 2. Second we try to get a suggested thread that's not assigned to any core or - * that is not the top thread in that core. - * - * 3. Third is no suggested thread is found, we do a second pass and pick a running - * thread in another core and swap it with its current thread. - * - * returns the cores needing scheduling. - */ - u32 SelectThreads(); - - bool HaveReadyThreads(std::size_t core_id) const { - return !scheduled_queue[core_id].empty(); - } - - /** - * Takes a thread and moves it to the back of the it's priority list. - * - * @note This operation can be redundant and no scheduling is changed if marked as so. - */ - bool YieldThread(Thread* thread); - - /** - * Takes a thread and moves it to the back of the it's priority list. - * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or - * a better priority than the next thread in the core. - * - * @note This operation can be redundant and no scheduling is changed if marked as so. - */ - bool YieldThreadAndBalanceLoad(Thread* thread); - - /** - * Takes a thread and moves it out of the scheduling queue. - * and into the suggested queue. If no thread can be scheduled afterwards in that core, - * a suggested thread is obtained instead. - * - * @note This operation can be redundant and no scheduling is changed if marked as so. - */ - bool YieldThreadAndWaitForLoadBalancing(Thread* thread); - - /** - * Rotates the scheduling queues of threads at a preemption priority and then does - * some core rebalancing. Preemption priorities can be found in the array - * 'preemption_priorities'. - * - * @note This operation happens every 10ms. - */ - void PreemptThreads(); - - u32 CpuCoresCount() const { - return Core::Hardware::NUM_CPU_CORES; - } - - void SetReselectionPending() { - is_reselection_pending.store(true, std::memory_order_release); - } - - bool IsReselectionPending() const { - return is_reselection_pending.load(std::memory_order_acquire); - } - - void Shutdown(); - -private: - friend class SchedulerLock; - - /// Lock the scheduler to the current thread. - void Lock(); - - /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling - /// and reschedules current core if needed. - void Unlock(); - - void EnableInterruptAndSchedule(u32 cores_pending_reschedule, - Core::EmuThreadHandle global_thread); - - /** - * Add a thread to the suggested queue of a cpu core. Suggested threads may be - * picked if no thread is scheduled to run on the core. - */ - void Suggest(u32 priority, std::size_t core, Thread* thread); - - /** - * Remove a thread to the suggested queue of a cpu core. Suggested threads may be - * picked if no thread is scheduled to run on the core. - */ - void Unsuggest(u32 priority, std::size_t core, Thread* thread); - - /** - * Add a thread to the scheduling queue of a cpu core. The thread is added at the - * back the queue in its priority level. - */ - void Schedule(u32 priority, std::size_t core, Thread* thread); - - /** - * Add a thread to the scheduling queue of a cpu core. The thread is added at the - * front the queue in its priority level. - */ - void SchedulePrepend(u32 priority, std::size_t core, Thread* thread); - - /// Reschedule an already scheduled thread based on a new priority - void Reschedule(u32 priority, std::size_t core, Thread* thread); - - /// Unschedules a thread. - void Unschedule(u32 priority, std::size_t core, Thread* thread); - - /** - * Transfers a thread into an specific core. If the destination_core is -1 - * it will be unscheduled from its source code and added into its suggested - * queue. - */ - void TransferToCore(u32 priority, s32 destination_core, Thread* thread); - - bool AskForReselectionOrMarkRedundant(Thread* current_thread, const Thread* winner); - - static constexpr u32 min_regular_priority = 2; - std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES> - scheduled_queue; - std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES> - suggested_queue; - std::atomic<bool> is_reselection_pending{false}; - - // The priority levels at which the global scheduler preempts threads every 10 ms. They are - // ordered from Core 0 to Core 3. - std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62}; - - /// Scheduler lock mechanisms. - bool is_locked{}; - std::mutex inner_lock; - std::atomic<s64> scope_lock{}; - Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()}; - - Common::SpinLock global_list_guard{}; - - /// Lists all thread ids that aren't deleted/etc. - std::vector<std::shared_ptr<Thread>> thread_list; - KernelCore& kernel; -}; - -class Scheduler final { -public: - explicit Scheduler(Core::System& system, std::size_t core_id); - ~Scheduler(); - - /// Returns whether there are any threads that are ready to run. - bool HaveReadyThreads() const; - - /// Reschedules to the next available thread (call after current thread is suspended) - void TryDoContextSwitch(); - - /// The next two are for SingleCore Only. - /// Unload current thread before preempting core. - void Unload(Thread* thread); - void Unload(); - /// Reload current thread after core preemption. - void Reload(Thread* thread); - void Reload(); - - /// Gets the current running thread - Thread* GetCurrentThread() const; - - /// Gets the currently selected thread from the top of the multilevel queue - Thread* GetSelectedThread() const; - - /// Gets the timestamp for the last context switch in ticks. - u64 GetLastContextSwitchTicks() const; - - bool ContextSwitchPending() const { - return is_context_switch_pending; - } - - void Initialize(); - - /// Shutdowns the scheduler. - void Shutdown(); - - void OnThreadStart(); - - std::shared_ptr<Common::Fiber>& ControlContext() { - return switch_fiber; - } - - const std::shared_ptr<Common::Fiber>& ControlContext() const { - return switch_fiber; - } - -private: - friend class GlobalScheduler; - - /// Switches the CPU's active thread context to that of the specified thread - void SwitchContext(); - - /// When a thread wakes up, it must run this through it's new scheduler - void SwitchContextStep2(); - - /** - * Called on every context switch to update the internal timestamp - * This also updates the running time ticks for the given thread and - * process using the following difference: - * - * ticks += most_recent_ticks - last_context_switch_ticks - * - * The internal tick timestamp for the scheduler is simply the - * most recent tick count retrieved. No special arithmetic is - * applied to it. - */ - void UpdateLastContextSwitchTime(Thread* thread, Process* process); - - static void OnSwitch(void* this_scheduler); - void SwitchToCurrent(); - - std::shared_ptr<Thread> current_thread = nullptr; - std::shared_ptr<Thread> selected_thread = nullptr; - std::shared_ptr<Thread> current_thread_prev = nullptr; - std::shared_ptr<Thread> selected_thread_set = nullptr; - std::shared_ptr<Thread> idle_thread = nullptr; - - std::shared_ptr<Common::Fiber> switch_fiber = nullptr; - - Core::System& system; - u64 last_context_switch_time = 0; - u64 idle_selection_count = 0; - const std::size_t core_id; - - Common::SpinLock guard{}; - - bool is_context_switch_pending = false; -}; - -class SchedulerLock { -public: - [[nodiscard]] explicit SchedulerLock(KernelCore& kernel); - ~SchedulerLock(); - -protected: - KernelCore& kernel; -}; - -class SchedulerLockAndSleep : public SchedulerLock { -public: - explicit SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, Thread* time_task, - s64 nanoseconds); - ~SchedulerLockAndSleep(); - - void CancelSleep() { - sleep_cancelled = true; - } - - void Release(); - -private: - Handle& event_handle; - Thread* time_task; - s64 nanoseconds; - bool sleep_cancelled{}; -}; - -} // namespace Kernel diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 8c19f2534..b40fe3916 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -14,9 +14,9 @@ #include "core/hle/kernel/client_session.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/hle_ipc.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/server_session.h" #include "core/hle/kernel/session.h" #include "core/hle/kernel/thread.h" @@ -25,19 +25,19 @@ namespace Kernel { ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} -ServerSession::~ServerSession() = default; + +ServerSession::~ServerSession() { + kernel.ReleaseServiceThread(service_thread); +} ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel, std::shared_ptr<Session> parent, std::string name) { std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)}; - session->request_event = - Core::Timing::CreateEvent(name, [session](std::uintptr_t, std::chrono::nanoseconds) { - session->CompleteSyncRequest(); - }); session->name = std::move(name); session->parent = std::move(parent); + session->service_thread = kernel.CreateServiceThread(session->name); return MakeResult(std::move(session)); } @@ -130,8 +130,7 @@ ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& con } } - LOG_CRITICAL(IPC, "Unknown domain command={}", - static_cast<int>(domain_message_header.command.Value())); + LOG_CRITICAL(IPC, "Unknown domain command={}", domain_message_header.command.Value()); ASSERT(false); return RESULT_SUCCESS; } @@ -143,16 +142,16 @@ ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread, std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread)); context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf); - request_queue.Push(std::move(context)); + + if (auto strong_ptr = service_thread.lock()) { + strong_ptr->QueueSyncRequest(*this, std::move(context)); + return RESULT_SUCCESS; + } return RESULT_SUCCESS; } -ResultCode ServerSession::CompleteSyncRequest() { - ASSERT(!request_queue.Empty()); - - auto& context = *request_queue.Front(); - +ResultCode ServerSession::CompleteSyncRequest(HLERequestContext& context) { ResultCode result = RESULT_SUCCESS; // If the session has been converted to a domain, handle the domain request if (IsDomain() && context.HasDomainMessageHeader()) { @@ -171,25 +170,20 @@ ResultCode ServerSession::CompleteSyncRequest() { // Some service requests require the thread to block { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); if (!context.IsThreadWaiting()) { context.GetThread().ResumeFromWait(); context.GetThread().SetSynchronizationResults(nullptr, result); } } - request_queue.Pop(); - return result; } ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory, Core::Timing::CoreTiming& core_timing) { - const ResultCode result = QueueSyncRequest(std::move(thread), memory); - const auto delay = std::chrono::nanoseconds{kernel.IsMulticore() ? 0 : 20000}; - core_timing.ScheduleEvent(delay, request_event, {}); - return result; + return QueueSyncRequest(std::move(thread), memory); } } // namespace Kernel diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index d23e9ec68..e8d1d99ea 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -10,6 +10,7 @@ #include <vector> #include "common/threadsafe_queue.h" +#include "core/hle/kernel/service_thread.h" #include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" @@ -43,6 +44,8 @@ class Thread; * TLS buffer and control is transferred back to it. */ class ServerSession final : public SynchronizationObject { + friend class ServiceThread; + public: explicit ServerSession(KernelCore& kernel); ~ServerSession() override; @@ -132,7 +135,7 @@ private: ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory); /// Completes a sync request from the emulated application. - ResultCode CompleteSyncRequest(); + ResultCode CompleteSyncRequest(HLERequestContext& context); /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an /// object handle. @@ -163,11 +166,8 @@ private: /// The name of this session (optional) std::string name; - /// Core timing event used to schedule the service request at some point in the future - std::shared_ptr<Core::Timing::EventType> request_event; - - /// Queue of scheduled service requests - Common::MPSCQueue<std::shared_ptr<Kernel::HLERequestContext>> request_queue; + /// Thread to dispatch service requests + std::weak_ptr<ServiceThread> service_thread; }; } // namespace Kernel diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp new file mode 100644 index 000000000..ee46f3e21 --- /dev/null +++ b/src/core/hle/kernel/service_thread.cpp @@ -0,0 +1,110 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <condition_variable> +#include <functional> +#include <mutex> +#include <thread> +#include <vector> +#include <queue> + +#include "common/assert.h" +#include "common/scope_exit.h" +#include "common/thread.h" +#include "core/core.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/server_session.h" +#include "core/hle/kernel/service_thread.h" +#include "core/hle/lock.h" +#include "video_core/renderer_base.h" + +namespace Kernel { + +class ServiceThread::Impl final { +public: + explicit Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name); + ~Impl(); + + void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context); + +private: + std::vector<std::thread> threads; + std::queue<std::function<void()>> requests; + std::mutex queue_mutex; + std::condition_variable condition; + const std::string service_name; + bool stop{}; +}; + +ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name) + : service_name{name} { + for (std::size_t i = 0; i < num_threads; ++i) + threads.emplace_back([this, &kernel] { + Common::SetCurrentThreadName(std::string{"yuzu:HleService:" + service_name}.c_str()); + + // Wait for first request before trying to acquire a render context + { + std::unique_lock lock{queue_mutex}; + condition.wait(lock, [this] { return stop || !requests.empty(); }); + } + + kernel.RegisterHostThread(); + + while (true) { + std::function<void()> task; + + { + std::unique_lock lock{queue_mutex}; + condition.wait(lock, [this] { return stop || !requests.empty(); }); + if (stop || requests.empty()) { + return; + } + task = std::move(requests.front()); + requests.pop(); + } + + task(); + } + }); +} + +void ServiceThread::Impl::QueueSyncRequest(ServerSession& session, + std::shared_ptr<HLERequestContext>&& context) { + { + std::unique_lock lock{queue_mutex}; + + // ServerSession owns the service thread, so we cannot caption a strong pointer here in the + // event that the ServerSession is terminated. + std::weak_ptr<ServerSession> weak_ptr{SharedFrom(&session)}; + requests.emplace([weak_ptr, context{std::move(context)}]() { + if (auto strong_ptr = weak_ptr.lock()) { + strong_ptr->CompleteSyncRequest(*context); + } + }); + } + condition.notify_one(); +} + +ServiceThread::Impl::~Impl() { + { + std::unique_lock lock{queue_mutex}; + stop = true; + } + condition.notify_all(); + for (std::thread& thread : threads) { + thread.join(); + } +} + +ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name) + : impl{std::make_unique<Impl>(kernel, num_threads, name)} {} + +ServiceThread::~ServiceThread() = default; + +void ServiceThread::QueueSyncRequest(ServerSession& session, + std::shared_ptr<HLERequestContext>&& context) { + impl->QueueSyncRequest(session, std::move(context)); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/service_thread.h b/src/core/hle/kernel/service_thread.h new file mode 100644 index 000000000..025ab8fb5 --- /dev/null +++ b/src/core/hle/kernel/service_thread.h @@ -0,0 +1,28 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <string> + +namespace Kernel { + +class HLERequestContext; +class KernelCore; +class ServerSession; + +class ServiceThread final { +public: + explicit ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name); + ~ServiceThread(); + + void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context); + +private: + class Impl; + std::unique_ptr<Impl> impl; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 95d6e2b4d..de3ed25da 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -24,6 +24,8 @@ #include "core/hle/kernel/client_session.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/memory/memory_block.h" #include "core/hle/kernel/memory/page_table.h" @@ -32,7 +34,6 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/resource_limit.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/svc_types.h" @@ -234,8 +235,7 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si static ResultCode SetMemoryAttribute32(Core::System& system, u32 address, u32 size, u32 mask, u32 attribute) { - return SetMemoryAttribute(system, static_cast<VAddr>(address), static_cast<std::size_t>(size), - mask, attribute); + return SetMemoryAttribute(system, address, size, mask, attribute); } /// Maps a memory range into a different range. @@ -255,8 +255,7 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr } static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { - return MapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr), - static_cast<std::size_t>(size)); + return MapMemory(system, dst_addr, src_addr, size); } /// Unmaps a region that was previously mapped with svcMapMemory @@ -276,8 +275,7 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad } static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { - return UnmapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr), - static_cast<std::size_t>(size)); + return UnmapMemory(system, dst_addr, src_addr, size); } /// Connect to an OS service given the port name, returns the handle to the port to out @@ -332,7 +330,8 @@ static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle, /// Makes a blocking IPC call to an OS service. static ResultCode SendSyncRequest(Core::System& system, Handle handle) { - const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); + auto& kernel = system.Kernel(); + const auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle); if (!session) { LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle); @@ -341,9 +340,9 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); - auto thread = system.CurrentScheduler().GetCurrentThread(); + auto thread = kernel.CurrentScheduler()->GetCurrentThread(); { - SchedulerLock lock(system.Kernel()); + KScopedSchedulerLock lock(kernel); thread->InvalidateHLECallback(); thread->SetStatus(ThreadStatus::WaitIPC); session->SendSyncRequest(SharedFrom(thread), system.Memory(), system.CoreTiming()); @@ -352,12 +351,12 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { if (thread->HasHLECallback()) { Handle event_handle = thread->GetHLETimeEvent(); if (event_handle != InvalidHandle) { - auto& time_manager = system.Kernel().TimeManager(); + auto& time_manager = kernel.TimeManager(); time_manager.UnscheduleTimeEvent(event_handle); } { - SchedulerLock lock(system.Kernel()); + KScopedSchedulerLock lock(kernel); auto* sync_object = thread->GetHLESyncObject(); sync_object->RemoveWaitingThread(SharedFrom(thread)); } @@ -531,8 +530,7 @@ static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_hand static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle, u32 mutex_addr, Handle requesting_thread_handle) { - return ArbitrateLock(system, holding_thread_handle, static_cast<VAddr>(mutex_addr), - requesting_thread_handle); + return ArbitrateLock(system, holding_thread_handle, mutex_addr, requesting_thread_handle); } /// Unlock a mutex @@ -555,7 +553,7 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) { } static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) { - return ArbitrateUnlock(system, static_cast<VAddr>(mutex_addr)); + return ArbitrateUnlock(system, mutex_addr); } enum class BreakType : u32 { @@ -658,7 +656,6 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) { info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt); if (!break_reason.signal_debugger) { - SchedulerLock lock(system.Kernel()); LOG_CRITICAL( Debug_Emulated, "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}", @@ -666,18 +663,14 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) { handle_debug_buffer(info1, info2); - auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); + auto* const current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread(); const auto thread_processor_id = current_thread->GetProcessorID(); system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace(); - - // Kill the current thread - system.Kernel().ExceptionalExit(); - current_thread->Stop(); } } static void Break32(Core::System& system, u32 reason, u32 info1, u32 info2) { - Break(system, reason, static_cast<u64>(info1), static_cast<u64>(info2)); + Break(system, reason, info1, info2); } /// Used to output a message on a debug hardware unit - does nothing on a retail unit @@ -922,7 +915,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha } const auto& core_timing = system.CoreTiming(); - const auto& scheduler = system.CurrentScheduler(); + const auto& scheduler = *system.Kernel().CurrentScheduler(); const auto* const current_thread = scheduler.GetCurrentThread(); const bool same_thread = current_thread == thread.get(); @@ -948,7 +941,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_high, u32 sub_id_low, u32 info_id, u32 handle, u32 sub_id_high) { - const u64 sub_id{static_cast<u64>(sub_id_low | (static_cast<u64>(sub_id_high) << 32))}; + const u64 sub_id{u64{sub_id_low} | (u64{sub_id_high} << 32)}; u64 res_value{}; const ResultCode result{GetInfo(system, &res_value, info_id, handle, sub_id)}; @@ -1009,7 +1002,7 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) } static ResultCode MapPhysicalMemory32(Core::System& system, u32 addr, u32 size) { - return MapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size)); + return MapPhysicalMemory(system, addr, size); } /// Unmaps memory previously mapped via MapPhysicalMemory @@ -1063,7 +1056,7 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size } static ResultCode UnmapPhysicalMemory32(Core::System& system, u32 addr, u32 size) { - return UnmapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size)); + return UnmapPhysicalMemory(system, addr, size); } /// Sets the thread activity @@ -1090,7 +1083,7 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act return ERR_INVALID_HANDLE; } - if (thread.get() == system.CurrentScheduler().GetCurrentThread()) { + if (thread.get() == system.Kernel().CurrentScheduler()->GetCurrentThread()) { LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread"); return ERR_BUSY; } @@ -1123,7 +1116,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H return ERR_INVALID_HANDLE; } - if (thread.get() == system.CurrentScheduler().GetCurrentThread()) { + if (thread.get() == system.Kernel().CurrentScheduler()->GetCurrentThread()) { LOG_ERROR(Kernel_SVC, "The thread handle specified is the current running thread"); return ERR_BUSY; } @@ -1144,7 +1137,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H } static ResultCode GetThreadContext32(Core::System& system, u32 thread_context, Handle handle) { - return GetThreadContext(system, static_cast<VAddr>(thread_context), handle); + return GetThreadContext(system, thread_context, handle); } /// Gets the priority for the specified thread @@ -1281,8 +1274,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han static ResultCode MapSharedMemory32(Core::System& system, Handle shared_memory_handle, u32 addr, u32 size, u32 permissions) { - return MapSharedMemory(system, shared_memory_handle, static_cast<VAddr>(addr), - static_cast<std::size_t>(size), permissions); + return MapSharedMemory(system, shared_memory_handle, addr, size, permissions); } static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address, @@ -1480,7 +1472,7 @@ static void ExitProcess(Core::System& system) { current_process->PrepareForTermination(); // Kill the current thread - system.CurrentScheduler().GetCurrentThread()->Stop(); + system.Kernel().CurrentScheduler()->GetCurrentThread()->Stop(); } static void ExitProcess32(Core::System& system) { @@ -1552,8 +1544,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e static ResultCode CreateThread32(Core::System& system, Handle* out_handle, u32 priority, u32 entry_point, u32 arg, u32 stack_top, s32 processor_id) { - return CreateThread(system, out_handle, static_cast<VAddr>(entry_point), static_cast<u64>(arg), - static_cast<VAddr>(stack_top), priority, processor_id); + return CreateThread(system, out_handle, entry_point, arg, stack_top, priority, processor_id); } /// Starts the thread for the provided handle @@ -1581,8 +1572,8 @@ static ResultCode StartThread32(Core::System& system, Handle thread_handle) { static void ExitThread(Core::System& system) { LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC()); - auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); - system.GlobalScheduler().RemoveThread(SharedFrom(current_thread)); + auto* const current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread(); + system.GlobalSchedulerContext().RemoveThread(SharedFrom(current_thread)); current_thread->Stop(); } @@ -1592,53 +1583,39 @@ static void ExitThread32(Core::System& system) { /// Sleep the current thread static void SleepThread(Core::System& system, s64 nanoseconds) { - LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds); + LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds); enum class SleepType : s64 { - YieldWithoutLoadBalancing = 0, - YieldWithLoadBalancing = -1, + YieldWithoutCoreMigration = 0, + YieldWithCoreMigration = -1, YieldAndWaitForLoadBalancing = -2, }; - auto& scheduler = system.CurrentScheduler(); - auto* const current_thread = scheduler.GetCurrentThread(); - bool is_redundant = false; - + auto& scheduler = *system.Kernel().CurrentScheduler(); if (nanoseconds <= 0) { switch (static_cast<SleepType>(nanoseconds)) { - case SleepType::YieldWithoutLoadBalancing: { - auto pair = current_thread->YieldSimple(); - is_redundant = pair.second; + case SleepType::YieldWithoutCoreMigration: { + scheduler.YieldWithoutCoreMigration(); break; } - case SleepType::YieldWithLoadBalancing: { - auto pair = current_thread->YieldAndBalanceLoad(); - is_redundant = pair.second; + case SleepType::YieldWithCoreMigration: { + scheduler.YieldWithCoreMigration(); break; } case SleepType::YieldAndWaitForLoadBalancing: { - auto pair = current_thread->YieldAndWaitForLoadBalancing(); - is_redundant = pair.second; + scheduler.YieldToAnyThread(); break; } default: UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); } } else { - current_thread->Sleep(nanoseconds); - } - - if (is_redundant && !system.Kernel().IsMulticore()) { - system.Kernel().ExitSVCProfile(); - system.CoreTiming().AddTicks(1000U); - system.GetCpuManager().PreemptSingleCore(); - system.Kernel().EnterSVCProfile(); + scheduler.GetCurrentThread()->Sleep(nanoseconds); } } static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanoseconds_high) { - const s64 nanoseconds = static_cast<s64>(static_cast<u64>(nanoseconds_low) | - (static_cast<u64>(nanoseconds_high) << 32)); + const auto nanoseconds = static_cast<s64>(u64{nanoseconds_low} | (u64{nanoseconds_high} << 32)); SleepThread(system, nanoseconds); } @@ -1668,10 +1645,10 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); auto& kernel = system.Kernel(); Handle event_handle; - Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); - auto* const current_process = system.Kernel().CurrentProcess(); + Thread* current_thread = kernel.CurrentScheduler()->GetCurrentThread(); + auto* const current_process = kernel.CurrentProcess(); { - SchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds); + KScopedSchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds); const auto& handle_table = current_process->GetHandleTable(); std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle); ASSERT(thread); @@ -1707,7 +1684,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add } { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); auto* owner = current_thread->GetLockOwner(); if (owner != nullptr) { @@ -1724,10 +1701,8 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr, u32 condition_variable_addr, Handle thread_handle, u32 nanoseconds_low, u32 nanoseconds_high) { - const s64 nanoseconds = - static_cast<s64>(nanoseconds_low | (static_cast<u64>(nanoseconds_high) << 32)); - return WaitProcessWideKeyAtomic(system, static_cast<VAddr>(mutex_addr), - static_cast<VAddr>(condition_variable_addr), thread_handle, + const auto nanoseconds = static_cast<s64>(nanoseconds_low | (u64{nanoseconds_high} << 32)); + return WaitProcessWideKeyAtomic(system, mutex_addr, condition_variable_addr, thread_handle, nanoseconds); } @@ -1740,7 +1715,7 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ // Retrieve a list of all threads that are waiting for this condition variable. auto& kernel = system.Kernel(); - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); auto* const current_process = kernel.CurrentProcess(); std::vector<std::shared_ptr<Thread>> waiting_threads = current_process->GetConditionVariableThreads(condition_variable_addr); @@ -1833,8 +1808,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value, u32 timeout_low, u32 timeout_high) { - s64 timeout = static_cast<s64>(timeout_low | (static_cast<u64>(timeout_high) << 32)); - return WaitForAddress(system, static_cast<VAddr>(address), type, value, timeout); + const auto timeout = static_cast<s64>(timeout_low | (u64{timeout_high} << 32)); + return WaitForAddress(system, address, type, value, timeout); } // Signals to an address (via Address Arbiter) @@ -1862,7 +1837,7 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value, s32 num_to_wake) { - return SignalToAddress(system, static_cast<VAddr>(address), type, value, num_to_wake); + return SignalToAddress(system, address, type, value, num_to_wake); } static void KernelDebug([[maybe_unused]] Core::System& system, @@ -1893,7 +1868,7 @@ static u64 GetSystemTick(Core::System& system) { } static void GetSystemTick32(Core::System& system, u32* time_low, u32* time_high) { - u64 time = GetSystemTick(system); + const auto time = GetSystemTick(system); *time_low = static_cast<u32>(time); *time_high = static_cast<u32>(time >> 32); } @@ -1984,8 +1959,7 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd static ResultCode CreateTransferMemory32(Core::System& system, Handle* handle, u32 addr, u32 size, u32 permissions) { - return CreateTransferMemory(system, handle, static_cast<VAddr>(addr), - static_cast<std::size_t>(size), permissions); + return CreateTransferMemory(system, handle, addr, size, permissions); } static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core, @@ -2003,7 +1977,7 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, } *core = thread->GetIdealCore(); - *mask = thread->GetAffinityMask(); + *mask = thread->GetAffinityMask().GetAffinityMask(); return RESULT_SUCCESS; } @@ -2075,8 +2049,7 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle, u32 core, u32 affinity_mask_low, u32 affinity_mask_high) { - const u64 affinity_mask = - static_cast<u64>(affinity_mask_low) | (static_cast<u64>(affinity_mask_high) << 32); + const auto affinity_mask = u64{affinity_mask_low} | (u64{affinity_mask_high} << 32); return SetThreadCoreMask(system, thread_handle, core, affinity_mask); } @@ -2341,9 +2314,10 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd return RESULT_SUCCESS; } -static ResultCode FlushProcessDataCache32(Core::System& system, Handle handle, u32 address, - u32 size) { - // Note(Blinkhawk): For emulation purposes of the data cache this is mostly a nope +static ResultCode FlushProcessDataCache32([[maybe_unused]] Core::System& system, + [[maybe_unused]] Handle handle, + [[maybe_unused]] u32 address, [[maybe_unused]] u32 size) { + // Note(Blinkhawk): For emulation purposes of the data cache this is mostly a no-op, // as all emulation is done in the same cache level in host architecture, thus data cache // does not need flushing. LOG_DEBUG(Kernel_SVC, "called"); @@ -2639,7 +2613,7 @@ void Call(Core::System& system, u32 immediate) { auto& kernel = system.Kernel(); kernel.EnterSVCProfile(); - auto* thread = system.CurrentScheduler().GetCurrentThread(); + auto* thread = kernel.CurrentScheduler()->GetCurrentThread(); thread->SetContinuousOnSVC(true); const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate) diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp index 8b875d853..d3f520ea2 100644 --- a/src/core/hle/kernel/synchronization.cpp +++ b/src/core/hle/kernel/synchronization.cpp @@ -5,8 +5,9 @@ #include "core/core.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/synchronization.h" #include "core/hle/kernel/synchronization_object.h" #include "core/hle/kernel/thread.h" @@ -18,7 +19,7 @@ Synchronization::Synchronization(Core::System& system) : system{system} {} void Synchronization::SignalObject(SynchronizationObject& obj) const { auto& kernel = system.Kernel(); - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); if (obj.IsSignaled()) { for (auto thread : obj.GetWaitingThreads()) { if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) { @@ -37,10 +38,10 @@ void Synchronization::SignalObject(SynchronizationObject& obj) const { std::pair<ResultCode, Handle> Synchronization::WaitFor( std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) { auto& kernel = system.Kernel(); - auto* const thread = system.CurrentScheduler().GetCurrentThread(); + auto* const thread = kernel.CurrentScheduler()->GetCurrentThread(); Handle event_handle = InvalidHandle; { - SchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds); + KScopedSchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds); const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(), [thread](const std::shared_ptr<SynchronizationObject>& object) { @@ -89,7 +90,7 @@ std::pair<ResultCode, Handle> Synchronization::WaitFor( } { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); ResultCode signaling_result = thread->GetSignalingResult(); SynchronizationObject* signaling_object = thread->GetSignalingObject(); thread->SetSynchronizationObjects(nullptr); diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 7d1eb2c6e..a4f9e0d97 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -17,10 +17,11 @@ #include "core/hardware_properties.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/time_manager.h" #include "core/hle/result.h" @@ -50,7 +51,7 @@ Thread::~Thread() = default; void Thread::Stop() { { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); SetStatus(ThreadStatus::Dead); Signal(); kernel.GlobalHandleTable().Close(global_handle); @@ -67,7 +68,7 @@ void Thread::Stop() { } void Thread::ResumeFromWait() { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); switch (status) { case ThreadStatus::Paused: case ThreadStatus::WaitSynch: @@ -99,19 +100,18 @@ void Thread::ResumeFromWait() { } void Thread::OnWakeUp() { - SchedulerLock lock(kernel); - + KScopedSchedulerLock lock(kernel); SetStatus(ThreadStatus::Ready); } ResultCode Thread::Start() { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); SetStatus(ThreadStatus::Ready); return RESULT_SUCCESS; } void Thread::CancelWait() { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) { is_sync_cancelled = true; return; @@ -186,12 +186,14 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy thread->status = ThreadStatus::Dormant; thread->entry_point = entry_point; thread->stack_top = stack_top; + thread->disable_count = 1; thread->tpidr_el0 = 0; thread->nominal_priority = thread->current_priority = priority; - thread->last_running_ticks = 0; + thread->schedule_count = -1; + thread->last_scheduled_tick = 0; thread->processor_id = processor_id; thread->ideal_core = processor_id; - thread->affinity_mask = 1ULL << processor_id; + thread->affinity_mask.SetAffinity(processor_id, true); thread->wait_objects = nullptr; thread->mutex_wait_address = 0; thread->condvar_wait_address = 0; @@ -201,7 +203,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy thread->owner_process = owner_process; thread->type = type_flags; if ((type_flags & THREADTYPE_IDLE) == 0) { - auto& scheduler = kernel.GlobalScheduler(); + auto& scheduler = kernel.GlobalSchedulerContext(); scheduler.AddThread(thread); } if (owner_process) { @@ -225,7 +227,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy } void Thread::SetPriority(u32 priority) { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST, "Invalid priority value."); nominal_priority = priority; @@ -362,7 +364,7 @@ bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) { } ResultCode Thread::SetActivity(ThreadActivity value) { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); auto sched_status = GetSchedulingStatus(); @@ -391,7 +393,7 @@ ResultCode Thread::SetActivity(ThreadActivity value) { ResultCode Thread::Sleep(s64 nanoseconds) { Handle event_handle{}; { - SchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds); + KScopedSchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds); SetStatus(ThreadStatus::WaitSleep); } @@ -402,39 +404,12 @@ ResultCode Thread::Sleep(s64 nanoseconds) { return RESULT_SUCCESS; } -std::pair<ResultCode, bool> Thread::YieldSimple() { - bool is_redundant = false; - { - SchedulerLock lock(kernel); - is_redundant = kernel.GlobalScheduler().YieldThread(this); - } - return {RESULT_SUCCESS, is_redundant}; -} - -std::pair<ResultCode, bool> Thread::YieldAndBalanceLoad() { - bool is_redundant = false; - { - SchedulerLock lock(kernel); - is_redundant = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this); - } - return {RESULT_SUCCESS, is_redundant}; -} - -std::pair<ResultCode, bool> Thread::YieldAndWaitForLoadBalancing() { - bool is_redundant = false; - { - SchedulerLock lock(kernel); - is_redundant = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this); - } - return {RESULT_SUCCESS, is_redundant}; -} - void Thread::AddSchedulingFlag(ThreadSchedFlags flag) { const u32 old_state = scheduling_state; pausing_state |= static_cast<u32>(flag); const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); scheduling_state = base_scheduling | pausing_state; - kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state); + KScheduler::OnThreadStateChanged(kernel, this, old_state); } void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) { @@ -442,23 +417,24 @@ void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) { pausing_state &= ~static_cast<u32>(flag); const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); scheduling_state = base_scheduling | pausing_state; - kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state); + KScheduler::OnThreadStateChanged(kernel, this, old_state); } void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { const u32 old_state = scheduling_state; scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) | static_cast<u32>(new_status); - kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state); + KScheduler::OnThreadStateChanged(kernel, this, old_state); } void Thread::SetCurrentPriority(u32 new_priority) { const u32 old_priority = std::exchange(current_priority, new_priority); - kernel.GlobalScheduler().AdjustSchedulingOnPriority(this, old_priority); + KScheduler::OnThreadPriorityChanged(kernel, this, kernel.CurrentScheduler()->GetCurrentThread(), + old_priority); } ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { - SchedulerLock lock(kernel); + KScopedSchedulerLock lock(kernel); const auto HighestSetCore = [](u64 mask, u32 max_cores) { for (s32 core = static_cast<s32>(max_cores - 1); core >= 0; core--) { if (((mask >> core) & 1) != 0) { @@ -479,20 +455,21 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { } if (use_override) { ideal_core_override = new_core; - affinity_mask_override = new_affinity_mask; } else { - const u64 old_affinity_mask = std::exchange(affinity_mask, new_affinity_mask); + const auto old_affinity_mask = affinity_mask; + affinity_mask.SetAffinityMask(new_affinity_mask); ideal_core = new_core; - if (old_affinity_mask != new_affinity_mask) { + if (old_affinity_mask.GetAffinityMask() != new_affinity_mask) { const s32 old_core = processor_id; - if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) { + if (processor_id >= 0 && !affinity_mask.GetAffinity(processor_id)) { if (static_cast<s32>(ideal_core) < 0) { - processor_id = HighestSetCore(affinity_mask, Core::Hardware::NUM_CPU_CORES); + processor_id = HighestSetCore(affinity_mask.GetAffinityMask(), + Core::Hardware::NUM_CPU_CORES); } else { processor_id = ideal_core; } } - kernel.GlobalScheduler().AdjustSchedulingOnAffinity(this, old_affinity_mask, old_core); + KScheduler::OnThreadAffinityMaskChanged(kernel, this, old_affinity_mask, old_core); } } return RESULT_SUCCESS; diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index a75071e9b..11ef29888 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -4,6 +4,7 @@ #pragma once +#include <array> #include <functional> #include <string> #include <utility> @@ -12,6 +13,7 @@ #include "common/common_types.h" #include "common/spin_lock.h" #include "core/arm/arm_interface.h" +#include "core/hle/kernel/k_affinity_mask.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" @@ -27,10 +29,10 @@ class System; namespace Kernel { -class GlobalScheduler; +class GlobalSchedulerContext; class KernelCore; class Process; -class Scheduler; +class KScheduler; enum ThreadPriority : u32 { THREADPRIO_HIGHEST = 0, ///< Highest thread priority @@ -345,8 +347,12 @@ public: void SetStatus(ThreadStatus new_status); - u64 GetLastRunningTicks() const { - return last_running_ticks; + s64 GetLastScheduledTick() const { + return this->last_scheduled_tick; + } + + void SetLastScheduledTick(s64 tick) { + this->last_scheduled_tick = tick; } u64 GetTotalCPUTimeTicks() const { @@ -361,10 +367,18 @@ public: return processor_id; } + s32 GetActiveCore() const { + return GetProcessorID(); + } + void SetProcessorID(s32 new_core) { processor_id = new_core; } + void SetActiveCore(s32 new_core) { + processor_id = new_core; + } + Process* GetOwnerProcess() { return owner_process; } @@ -469,7 +483,7 @@ public: return ideal_core; } - u64 GetAffinityMask() const { + const KAffinityMask& GetAffinityMask() const { return affinity_mask; } @@ -478,21 +492,12 @@ public: /// Sleeps this thread for the given amount of nanoseconds. ResultCode Sleep(s64 nanoseconds); - /// Yields this thread without rebalancing loads. - std::pair<ResultCode, bool> YieldSimple(); - - /// Yields this thread and does a load rebalancing. - std::pair<ResultCode, bool> YieldAndBalanceLoad(); - - /// Yields this thread and if the core is left idle, loads are rebalanced - std::pair<ResultCode, bool> YieldAndWaitForLoadBalancing(); - - void IncrementYieldCount() { - yield_count++; + s64 GetYieldScheduleCount() const { + return this->schedule_count; } - u64 GetYieldCount() const { - return yield_count; + void SetYieldScheduleCount(s64 count) { + this->schedule_count = count; } ThreadSchedStatus GetSchedulingStatus() const { @@ -568,9 +573,59 @@ public: return has_exited; } + class QueueEntry { + public: + constexpr QueueEntry() = default; + + constexpr void Initialize() { + this->prev = nullptr; + this->next = nullptr; + } + + constexpr Thread* GetPrev() const { + return this->prev; + } + constexpr Thread* GetNext() const { + return this->next; + } + constexpr void SetPrev(Thread* thread) { + this->prev = thread; + } + constexpr void SetNext(Thread* thread) { + this->next = thread; + } + + private: + Thread* prev{}; + Thread* next{}; + }; + + QueueEntry& GetPriorityQueueEntry(s32 core) { + return this->per_core_priority_queue_entry[core]; + } + + const QueueEntry& GetPriorityQueueEntry(s32 core) const { + return this->per_core_priority_queue_entry[core]; + } + + s32 GetDisableDispatchCount() const { + return disable_count; + } + + void DisableDispatch() { + ASSERT(GetDisableDispatchCount() >= 0); + disable_count++; + } + + void EnableDispatch() { + ASSERT(GetDisableDispatchCount() > 0); + disable_count--; + } + private: - friend class GlobalScheduler; - friend class Scheduler; + friend class GlobalSchedulerContext; + friend class KScheduler; + friend class Process; void SetSchedulingStatus(ThreadSchedStatus new_status); void AddSchedulingFlag(ThreadSchedFlags flag); @@ -583,12 +638,14 @@ private: ThreadContext64 context_64{}; std::shared_ptr<Common::Fiber> host_context{}; - u64 thread_id = 0; - ThreadStatus status = ThreadStatus::Dormant; + u32 scheduling_state = 0; + + u64 thread_id = 0; VAddr entry_point = 0; VAddr stack_top = 0; + std::atomic_int disable_count = 0; ThreadType type; @@ -602,9 +659,8 @@ private: u32 current_priority = 0; u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. - u64 last_running_ticks = 0; ///< CPU tick when thread was last running - u64 yield_count = 0; ///< Number of redundant yields carried by this thread. - ///< a redundant yield is one where no scheduling is changed + s64 schedule_count{}; + s64 last_scheduled_tick{}; s32 processor_id = 0; @@ -646,16 +702,16 @@ private: Handle hle_time_event; SynchronizationObject* hle_object; - Scheduler* scheduler = nullptr; + KScheduler* scheduler = nullptr; + + std::array<QueueEntry, Core::Hardware::NUM_CPU_CORES> per_core_priority_queue_entry{}; u32 ideal_core{0xFFFFFFFF}; - u64 affinity_mask{0x1}; + KAffinityMask affinity_mask{}; s32 ideal_core_override = -1; - u64 affinity_mask_override = 0x1; u32 affinity_override_count = 0; - u32 scheduling_state = 0; u32 pausing_state = 0; bool is_running = false; bool is_waiting_on_sync = false; diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp index caf329bfb..79628e2b4 100644 --- a/src/core/hle/kernel/time_manager.cpp +++ b/src/core/hle/kernel/time_manager.cpp @@ -7,8 +7,8 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/time_manager.h" @@ -18,12 +18,18 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} { time_manager_event_type = Core::Timing::CreateEvent( "Kernel::TimeManagerCallback", [this](std::uintptr_t thread_handle, std::chrono::nanoseconds) { - const SchedulerLock lock(system.Kernel()); + const KScopedSchedulerLock lock(system.Kernel()); const auto proper_handle = static_cast<Handle>(thread_handle); - if (cancelled_events[proper_handle]) { - return; + + std::shared_ptr<Thread> thread; + { + std::lock_guard lock{mutex}; + if (cancelled_events[proper_handle]) { + return; + } + thread = system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); } - auto thread = this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); + if (thread) { // Thread can be null if process has exited thread->OnWakeUp(); @@ -56,6 +62,7 @@ void TimeManager::UnscheduleTimeEvent(Handle event_handle) { } void TimeManager::CancelTimeEvent(Thread* time_task) { + std::lock_guard lock{mutex}; const Handle event_handle = time_task->GetGlobalHandle(); UnscheduleTimeEvent(event_handle); } diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 38d877f6e..cb13210e5 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -1092,14 +1092,14 @@ void ILibraryAppletCreator::CreateLibraryApplet(Kernel::HLERequestContext& ctx) const auto applet_id = rp.PopRaw<Applets::AppletId>(); const auto applet_mode = rp.PopRaw<u32>(); - LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", - static_cast<u32>(applet_id), applet_mode); + LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", applet_id, + applet_mode); const auto& applet_manager{system.GetAppletManager()}; const auto applet = applet_manager.GetApplet(applet_id); if (applet == nullptr) { - LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", static_cast<u32>(applet_id)); + LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", applet_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_UNKNOWN); @@ -1290,7 +1290,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto kind = rp.PopEnum<LaunchParameterKind>(); - LOG_DEBUG(Service_AM, "called, kind={:08X}", static_cast<u8>(kind)); + LOG_DEBUG(Service_AM, "called, kind={:08X}", kind); if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) { const auto backend = BCAT::CreateBackendFromSettings(system, [this](u64 tid) { @@ -1537,8 +1537,8 @@ void IApplicationFunctions::GetSaveDataSize(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto [type, user_id] = rp.PopRaw<Parameters>(); - LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", static_cast<u8>(type), - user_id[1], user_id[0]); + LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", type, user_id[1], + user_id[0]); const auto size = system.GetFileSystemController().ReadSaveDataSize( type, system.CurrentProcess()->GetTitleID(), user_id); diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp index 2b626bb40..08676c3fc 100644 --- a/src/core/hle/service/am/applets/applets.cpp +++ b/src/core/hle/service/am/applets/applets.cpp @@ -142,14 +142,14 @@ void Applet::Initialize() { AppletFrontendSet::AppletFrontendSet() = default; -AppletFrontendSet::AppletFrontendSet(ControllerApplet controller, ECommerceApplet e_commerce, - ErrorApplet error, ParentalControlsApplet parental_controls, - PhotoViewer photo_viewer, ProfileSelect profile_select, - SoftwareKeyboard software_keyboard, WebBrowser web_browser) - : controller{std::move(controller)}, e_commerce{std::move(e_commerce)}, error{std::move(error)}, - parental_controls{std::move(parental_controls)}, photo_viewer{std::move(photo_viewer)}, - profile_select{std::move(profile_select)}, software_keyboard{std::move(software_keyboard)}, - web_browser{std::move(web_browser)} {} +AppletFrontendSet::AppletFrontendSet(ControllerApplet controller_applet, ErrorApplet error_applet, + ParentalControlsApplet parental_controls_applet, + PhotoViewer photo_viewer_, ProfileSelect profile_select_, + SoftwareKeyboard software_keyboard_, WebBrowser web_browser_) + : controller{std::move(controller_applet)}, error{std::move(error_applet)}, + parental_controls{std::move(parental_controls_applet)}, + photo_viewer{std::move(photo_viewer_)}, profile_select{std::move(profile_select_)}, + software_keyboard{std::move(software_keyboard_)}, web_browser{std::move(web_browser_)} {} AppletFrontendSet::~AppletFrontendSet() = default; @@ -170,10 +170,6 @@ void AppletManager::SetAppletFrontendSet(AppletFrontendSet set) { frontend.controller = std::move(set.controller); } - if (set.e_commerce != nullptr) { - frontend.e_commerce = std::move(set.e_commerce); - } - if (set.error != nullptr) { frontend.error = std::move(set.error); } @@ -210,10 +206,6 @@ void AppletManager::SetDefaultAppletsIfMissing() { std::make_unique<Core::Frontend::DefaultControllerApplet>(system.ServiceManager()); } - if (frontend.e_commerce == nullptr) { - frontend.e_commerce = std::make_unique<Core::Frontend::DefaultECommerceApplet>(); - } - if (frontend.error == nullptr) { frontend.error = std::make_unique<Core::Frontend::DefaultErrorApplet>(); } @@ -257,13 +249,14 @@ std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id) const { return std::make_shared<ProfileSelect>(system, *frontend.profile_select); case AppletId::SoftwareKeyboard: return std::make_shared<SoftwareKeyboard>(system, *frontend.software_keyboard); + case AppletId::Web: + case AppletId::Shop: + case AppletId::OfflineWeb: + case AppletId::LoginShare: + case AppletId::WebAuth: + return std::make_shared<WebBrowser>(system, *frontend.web_browser); case AppletId::PhotoViewer: return std::make_shared<PhotoViewer>(system, *frontend.photo_viewer); - case AppletId::LibAppletShop: - return std::make_shared<WebBrowser>(system, *frontend.web_browser, - frontend.e_commerce.get()); - case AppletId::LibAppletOff: - return std::make_shared<WebBrowser>(system, *frontend.web_browser); default: UNIMPLEMENTED_MSG( "No backend implementation exists for applet_id={:02X}! Falling back to stub applet.", diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h index a1f4cf897..4fd792c05 100644 --- a/src/core/hle/service/am/applets/applets.h +++ b/src/core/hle/service/am/applets/applets.h @@ -50,13 +50,13 @@ enum class AppletId : u32 { ProfileSelect = 0x10, SoftwareKeyboard = 0x11, MiiEdit = 0x12, - LibAppletWeb = 0x13, - LibAppletShop = 0x14, + Web = 0x13, + Shop = 0x14, PhotoViewer = 0x15, Settings = 0x16, - LibAppletOff = 0x17, - LibAppletWhitelisted = 0x18, - LibAppletAuth = 0x19, + OfflineWeb = 0x17, + LoginShare = 0x18, + WebAuth = 0x19, MyPage = 0x1A, }; @@ -157,7 +157,6 @@ protected: struct AppletFrontendSet { using ControllerApplet = std::unique_ptr<Core::Frontend::ControllerApplet>; - using ECommerceApplet = std::unique_ptr<Core::Frontend::ECommerceApplet>; using ErrorApplet = std::unique_ptr<Core::Frontend::ErrorApplet>; using ParentalControlsApplet = std::unique_ptr<Core::Frontend::ParentalControlsApplet>; using PhotoViewer = std::unique_ptr<Core::Frontend::PhotoViewerApplet>; @@ -166,10 +165,10 @@ struct AppletFrontendSet { using WebBrowser = std::unique_ptr<Core::Frontend::WebBrowserApplet>; AppletFrontendSet(); - AppletFrontendSet(ControllerApplet controller, ECommerceApplet e_commerce, ErrorApplet error, - ParentalControlsApplet parental_controls, PhotoViewer photo_viewer, - ProfileSelect profile_select, SoftwareKeyboard software_keyboard, - WebBrowser web_browser); + AppletFrontendSet(ControllerApplet controller_applet, ErrorApplet error_applet, + ParentalControlsApplet parental_controls_applet, PhotoViewer photo_viewer_, + ProfileSelect profile_select_, SoftwareKeyboard software_keyboard_, + WebBrowser web_browser_); ~AppletFrontendSet(); AppletFrontendSet(const AppletFrontendSet&) = delete; @@ -179,7 +178,6 @@ struct AppletFrontendSet { AppletFrontendSet& operator=(AppletFrontendSet&&) noexcept; ControllerApplet controller; - ECommerceApplet e_commerce; ErrorApplet error; ParentalControlsApplet parental_controls; PhotoViewer photo_viewer; diff --git a/src/core/hle/service/am/applets/controller.cpp b/src/core/hle/service/am/applets/controller.cpp index e8ea4248b..7edfca64e 100644 --- a/src/core/hle/service/am/applets/controller.cpp +++ b/src/core/hle/service/am/applets/controller.cpp @@ -29,14 +29,14 @@ static Core::Frontend::ControllerParameters ConvertToFrontendParameters( npad_style_set.raw = private_arg.style_set; return { - .min_players = std::max(s8(1), header.player_count_min), + .min_players = std::max(s8{1}, header.player_count_min), .max_players = header.player_count_max, .keep_controllers_connected = header.enable_take_over_connection, .enable_single_mode = header.enable_single_mode, .enable_border_color = header.enable_identification_color, - .border_colors = identification_colors, + .border_colors = std::move(identification_colors), .enable_explain_text = enable_text, - .explain_text = text, + .explain_text = std::move(text), .allow_pro_controller = npad_style_set.pro_controller == 1, .allow_handheld = npad_style_set.handheld == 1, .allow_dual_joycons = npad_style_set.joycon_dual == 1, @@ -227,15 +227,14 @@ void Controller::ConfigurationComplete() { // If enable_single_mode is enabled, player_count is 1 regardless of any other parameters. // Otherwise, only count connected players from P1-P8. result_info.player_count = - is_single_mode ? 1 - : static_cast<s8>(std::count_if( - players.begin(), players.end() - 2, - [](Settings::PlayerInput player) { return player.connected; })); - - result_info.selected_id = HID::Controller_NPad::IndexToNPad( - std::distance(players.begin(), - std::find_if(players.begin(), players.end(), - [](Settings::PlayerInput player) { return player.connected; }))); + is_single_mode + ? 1 + : static_cast<s8>(std::count_if(players.begin(), players.end() - 2, + [](const auto& player) { return player.connected; })); + + result_info.selected_id = HID::Controller_NPad::IndexToNPad(std::distance( + players.begin(), std::find_if(players.begin(), players.end(), + [](const auto& player) { return player.connected; }))); result_info.result = 0; diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp index dcd4b2a35..d85505082 100644 --- a/src/core/hle/service/am/applets/error.cpp +++ b/src/core/hle/service/am/applets/error.cpp @@ -125,7 +125,7 @@ void Error::Initialize() { error_code = Decode64BitError(args->error_record.error_code_64); break; default: - UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", static_cast<u8>(mode)); + UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", mode); } } @@ -179,7 +179,7 @@ void Error::Execute() { error_code, std::chrono::seconds{args->error_record.posix_time}, callback); break; default: - UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", static_cast<u8>(mode)); + UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", mode); DisplayCompleted(); } } diff --git a/src/core/hle/service/am/applets/general_backend.cpp b/src/core/hle/service/am/applets/general_backend.cpp index bdb6fd464..4d1df5cbe 100644 --- a/src/core/hle/service/am/applets/general_backend.cpp +++ b/src/core/hle/service/am/applets/general_backend.cpp @@ -90,7 +90,7 @@ void Auth::Execute() { const auto unimplemented_log = [this] { UNIMPLEMENTED_MSG("Unimplemented Auth applet type for type={:08X}, arg0={:02X}, " "arg1={:02X}, arg2={:02X}", - static_cast<u32>(type), arg0, arg1, arg2); + type, arg0, arg1, arg2); }; switch (type) { @@ -136,7 +136,7 @@ void Auth::Execute() { } void Auth::AuthFinished(bool is_successful) { - this->successful = is_successful; + successful = is_successful; struct Return { ResultCode result_code; @@ -193,7 +193,7 @@ void PhotoViewer::Execute() { frontend.ShowAllPhotos(callback); break; default: - UNIMPLEMENTED_MSG("Unimplemented PhotoViewer applet mode={:02X}!", static_cast<u8>(mode)); + UNIMPLEMENTED_MSG("Unimplemented PhotoViewer applet mode={:02X}!", mode); } } diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp index c3b6b706a..2ab420789 100644 --- a/src/core/hle/service/am/applets/web_browser.cpp +++ b/src/core/hle/service/am/applets/web_browser.cpp @@ -1,558 +1,478 @@ -// Copyright 2018 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <array> -#include <cstring> -#include <vector> - #include "common/assert.h" -#include "common/common_funcs.h" #include "common/common_paths.h" #include "common/file_util.h" -#include "common/hex_util.h" #include "common/logging/log.h" #include "common/string_util.h" #include "core/core.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/mode.h" #include "core/file_sys/nca_metadata.h" +#include "core/file_sys/patch_manager.h" #include "core/file_sys/registered_cache.h" #include "core/file_sys/romfs.h" #include "core/file_sys/system_archive/system_archive.h" -#include "core/file_sys/vfs_types.h" -#include "core/frontend/applets/general_frontend.h" +#include "core/file_sys/vfs_vector.h" #include "core/frontend/applets/web_browser.h" #include "core/hle/kernel/process.h" +#include "core/hle/result.h" +#include "core/hle/service/am/am.h" #include "core/hle/service/am/applets/web_browser.h" #include "core/hle/service/filesystem/filesystem.h" -#include "core/loader/loader.h" +#include "core/hle/service/ns/pl_u.h" namespace Service::AM::Applets { -enum class WebArgTLVType : u16 { - InitialURL = 0x1, - ShopArgumentsURL = 0x2, ///< TODO(DarkLordZach): This is not the official name. - CallbackURL = 0x3, - CallbackableURL = 0x4, - ApplicationID = 0x5, - DocumentPath = 0x6, - DocumentKind = 0x7, - SystemDataID = 0x8, - ShareStartPage = 0x9, - Whitelist = 0xA, - News = 0xB, - UserID = 0xE, - AlbumEntry0 = 0xF, - ScreenShotEnabled = 0x10, - EcClientCertEnabled = 0x11, - Unk12 = 0x12, - PlayReportEnabled = 0x13, - Unk14 = 0x14, - Unk15 = 0x15, - BootDisplayKind = 0x17, - BackgroundKind = 0x18, - FooterEnabled = 0x19, - PointerEnabled = 0x1A, - LeftStickMode = 0x1B, - KeyRepeatFrame1 = 0x1C, - KeyRepeatFrame2 = 0x1D, - BootAsMediaPlayerInv = 0x1E, - DisplayUrlKind = 0x1F, - BootAsMediaPlayer = 0x21, - ShopJumpEnabled = 0x22, - MediaAutoPlayEnabled = 0x23, - LobbyParameter = 0x24, - ApplicationAlbumEntry = 0x26, - JsExtensionEnabled = 0x27, - AdditionalCommentText = 0x28, - TouchEnabledOnContents = 0x29, - UserAgentAdditionalString = 0x2A, - AdditionalMediaData0 = 0x2B, - MediaPlayerAutoCloseEnabled = 0x2C, - PageCacheEnabled = 0x2D, - WebAudioEnabled = 0x2E, - Unk2F = 0x2F, - YouTubeVideoWhitelist = 0x31, - FooterFixedKind = 0x32, - PageFadeEnabled = 0x33, - MediaCreatorApplicationRatingAge = 0x34, - BootLoadingIconEnabled = 0x35, - PageScrollIndicationEnabled = 0x36, - MediaPlayerSpeedControlEnabled = 0x37, - AlbumEntry1 = 0x38, - AlbumEntry2 = 0x39, - AlbumEntry3 = 0x3A, - AdditionalMediaData1 = 0x3B, - AdditionalMediaData2 = 0x3C, - AdditionalMediaData3 = 0x3D, - BootFooterButton = 0x3E, - OverrideWebAudioVolume = 0x3F, - OverrideMediaAudioVolume = 0x40, - BootMode = 0x41, - WebSessionEnabled = 0x42, -}; - -enum class ShimKind : u32 { - Shop = 1, - Login = 2, - Offline = 3, - Share = 4, - Web = 5, - Wifi = 6, - Lobby = 7, -}; - -enum class ShopWebTarget { - ApplicationInfo, - AddOnContentList, - SubscriptionList, - ConsumableItemList, - Home, - Settings, -}; - namespace { -constexpr std::size_t SHIM_KIND_COUNT = 0x8; - -struct WebArgHeader { - u16 count; - INSERT_PADDING_BYTES(2); - ShimKind kind; -}; -static_assert(sizeof(WebArgHeader) == 0x8, "WebArgHeader has incorrect size."); - -struct WebArgTLV { - WebArgTLVType type; - u16 size; - u32 offset; -}; -static_assert(sizeof(WebArgTLV) == 0x8, "WebArgTLV has incorrect size."); - -struct WebCommonReturnValue { - u32 result_code; - INSERT_PADDING_BYTES(0x4); - std::array<char, 0x1000> last_url; - u64 last_url_size; -}; -static_assert(sizeof(WebCommonReturnValue) == 0x1010, "WebCommonReturnValue has incorrect size."); - -struct WebWifiPageArg { - INSERT_PADDING_BYTES(4); - std::array<char, 0x100> connection_test_url; - std::array<char, 0x400> initial_url; - std::array<u8, 0x10> nifm_network_uuid; - u32 nifm_requirement; -}; -static_assert(sizeof(WebWifiPageArg) == 0x518, "WebWifiPageArg has incorrect size."); - -struct WebWifiReturnValue { - INSERT_PADDING_BYTES(4); - u32 result; -}; -static_assert(sizeof(WebWifiReturnValue) == 0x8, "WebWifiReturnValue has incorrect size."); - -enum class OfflineWebSource : u32 { - OfflineHtmlPage = 0x1, - ApplicationLegalInformation = 0x2, - SystemDataPage = 0x3, -}; - -std::map<WebArgTLVType, std::vector<u8>> GetWebArguments(const std::vector<u8>& arg) { - if (arg.size() < sizeof(WebArgHeader)) - return {}; - - WebArgHeader header{}; - std::memcpy(&header, arg.data(), sizeof(WebArgHeader)); - - std::map<WebArgTLVType, std::vector<u8>> out; - u64 offset = sizeof(WebArgHeader); - for (std::size_t i = 0; i < header.count; ++i) { - if (arg.size() < (offset + sizeof(WebArgTLV))) - return out; +template <typename T> +void ParseRawValue(T& value, const std::vector<u8>& data) { + static_assert(std::is_trivially_copyable_v<T>, + "It's undefined behavior to use memcpy with non-trivially copyable objects"); + std::memcpy(&value, data.data(), data.size()); +} - WebArgTLV tlv{}; - std::memcpy(&tlv, arg.data() + offset, sizeof(WebArgTLV)); - offset += sizeof(WebArgTLV); +template <typename T> +T ParseRawValue(const std::vector<u8>& data) { + T value; + ParseRawValue(value, data); + return value; +} - offset += tlv.offset; - if (arg.size() < (offset + tlv.size)) - return out; +std::string ParseStringValue(const std::vector<u8>& data) { + return Common::StringFromFixedZeroTerminatedBuffer(reinterpret_cast<const char*>(data.data()), + data.size()); +} - std::vector<u8> data(tlv.size); - std::memcpy(data.data(), arg.data() + offset, tlv.size); - offset += tlv.size; +std::string GetMainURL(const std::string& url) { + const auto index = url.find('?'); - out.insert_or_assign(tlv.type, data); + if (index == std::string::npos) { + return url; } - return out; + return url.substr(0, index); } -FileSys::VirtualFile GetApplicationRomFS(const Core::System& system, u64 title_id, - FileSys::ContentRecordType type) { - const auto& installed{system.GetContentProvider()}; - const auto res = installed.GetEntry(title_id, type); +WebArgInputTLVMap ReadWebArgs(const std::vector<u8>& web_arg, WebArgHeader& web_arg_header) { + std::memcpy(&web_arg_header, web_arg.data(), sizeof(WebArgHeader)); - if (res != nullptr) { - return res->GetRomFS(); + if (web_arg.size() == sizeof(WebArgHeader)) { + return {}; } - if (type == FileSys::ContentRecordType::Data) { - return FileSys::SystemArchive::SynthesizeSystemArchive(title_id); + WebArgInputTLVMap input_tlv_map; + + u64 current_offset = sizeof(WebArgHeader); + + for (std::size_t i = 0; i < web_arg_header.total_tlv_entries; ++i) { + if (web_arg.size() < current_offset + sizeof(WebArgInputTLV)) { + return input_tlv_map; + } + + WebArgInputTLV input_tlv; + std::memcpy(&input_tlv, web_arg.data() + current_offset, sizeof(WebArgInputTLV)); + + current_offset += sizeof(WebArgInputTLV); + + if (web_arg.size() < current_offset + input_tlv.arg_data_size) { + return input_tlv_map; + } + + std::vector<u8> data(input_tlv.arg_data_size); + std::memcpy(data.data(), web_arg.data() + current_offset, input_tlv.arg_data_size); + + current_offset += input_tlv.arg_data_size; + + input_tlv_map.insert_or_assign(input_tlv.input_tlv_type, std::move(data)); } - return nullptr; + return input_tlv_map; } -} // Anonymous namespace +FileSys::VirtualFile GetOfflineRomFS(Core::System& system, u64 title_id, + FileSys::ContentRecordType nca_type) { + if (nca_type == FileSys::ContentRecordType::Data) { + const auto nca = + system.GetFileSystemController().GetSystemNANDContents()->GetEntry(title_id, nca_type); + + if (nca == nullptr) { + LOG_ERROR(Service_AM, + "NCA of type={} with title_id={:016X} is not found in the System NAND!", + nca_type, title_id); + return FileSys::SystemArchive::SynthesizeSystemArchive(title_id); + } -WebBrowser::WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_, - Core::Frontend::ECommerceApplet* frontend_e_commerce_) - : Applet{system_.Kernel()}, frontend(frontend_), - frontend_e_commerce(frontend_e_commerce_), system{system_} {} + return nca->GetRomFS(); + } else { + const auto nca = system.GetContentProvider().GetEntry(title_id, nca_type); -WebBrowser::~WebBrowser() = default; + if (nca == nullptr) { + LOG_ERROR(Service_AM, + "NCA of type={} with title_id={:016X} is not found in the ContentProvider!", + nca_type, title_id); + return nullptr; + } -void WebBrowser::Initialize() { - Applet::Initialize(); + const FileSys::PatchManager pm{title_id, system.GetFileSystemController(), + system.GetContentProvider()}; - complete = false; - temporary_dir.clear(); - filename.clear(); - status = RESULT_SUCCESS; + return pm.PatchRomFS(nca->GetRomFS(), nca->GetBaseIVFCOffset(), nca_type); + } +} - const auto web_arg_storage = broker.PopNormalDataToApplet(); - ASSERT(web_arg_storage != nullptr); - const auto& web_arg = web_arg_storage->GetData(); +void ExtractSharedFonts(Core::System& system) { + static constexpr std::array<const char*, 7> DECRYPTED_SHARED_FONTS{ + "FontStandard.ttf", + "FontChineseSimplified.ttf", + "FontExtendedChineseSimplified.ttf", + "FontChineseTraditional.ttf", + "FontKorean.ttf", + "FontNintendoExtended.ttf", + "FontNintendoExtended2.ttf", + }; - ASSERT(web_arg.size() >= 0x8); - std::memcpy(&kind, web_arg.data() + 0x4, sizeof(ShimKind)); + for (std::size_t i = 0; i < NS::SHARED_FONTS.size(); ++i) { + const auto fonts_dir = Common::FS::SanitizePath( + fmt::format("{}/fonts", Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)), + Common::FS::DirectorySeparator::PlatformDefault); - args = GetWebArguments(web_arg); + const auto font_file_path = + Common::FS::SanitizePath(fmt::format("{}/{}", fonts_dir, DECRYPTED_SHARED_FONTS[i]), + Common::FS::DirectorySeparator::PlatformDefault); - InitializeInternal(); -} + if (Common::FS::Exists(font_file_path)) { + continue; + } -bool WebBrowser::TransactionComplete() const { - return complete; -} + const auto font = NS::SHARED_FONTS[i]; + const auto font_title_id = static_cast<u64>(font.first); -ResultCode WebBrowser::GetStatus() const { - return status; -} + const auto nca = system.GetFileSystemController().GetSystemNANDContents()->GetEntry( + font_title_id, FileSys::ContentRecordType::Data); -void WebBrowser::ExecuteInteractive() { - UNIMPLEMENTED_MSG("Unexpected interactive data recieved!"); -} + FileSys::VirtualFile romfs; -void WebBrowser::Execute() { - if (complete) { - return; - } + if (!nca) { + romfs = FileSys::SystemArchive::SynthesizeSystemArchive(font_title_id); + } else { + romfs = nca->GetRomFS(); + } - if (status != RESULT_SUCCESS) { - complete = true; + if (!romfs) { + LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} cannot be extracted!", + font_title_id); + continue; + } - // This is a workaround in order not to softlock yuzu when an error happens during the - // webapplet init. In order to avoid an svcBreak, the status is set to RESULT_SUCCESS - Finalize(); - status = RESULT_SUCCESS; + const auto extracted_romfs = FileSys::ExtractRomFS(romfs); - return; - } + if (!extracted_romfs) { + LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} failed to extract!", + font_title_id); + continue; + } - ExecuteInternal(); -} + const auto font_file = extracted_romfs->GetFile(font.second); -void WebBrowser::UnpackRomFS() { - if (unpacked) - return; + if (!font_file) { + LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} has no font file \"{}\"!", + font_title_id, font.second); + continue; + } - ASSERT(offline_romfs != nullptr); - const auto dir = - FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard); - const auto& vfs{system.GetFilesystem()}; - const auto temp_dir = vfs->CreateDirectory(temporary_dir, FileSys::Mode::ReadWrite); - FileSys::VfsRawCopyD(dir, temp_dir); + std::vector<u32> font_data_u32(font_file->GetSize() / sizeof(u32)); + font_file->ReadBytes<u32>(font_data_u32.data(), font_file->GetSize()); - unpacked = true; -} + std::transform(font_data_u32.begin(), font_data_u32.end(), font_data_u32.begin(), + Common::swap32); -void WebBrowser::Finalize() { - complete = true; + std::vector<u8> decrypted_data(font_file->GetSize() - 8); - WebCommonReturnValue out{}; - out.result_code = 0; - out.last_url_size = 0; + NS::DecryptSharedFontToTTF(font_data_u32, decrypted_data); - std::vector<u8> data(sizeof(WebCommonReturnValue)); - std::memcpy(data.data(), &out, sizeof(WebCommonReturnValue)); + FileSys::VirtualFile decrypted_font = std::make_shared<FileSys::VectorVfsFile>( + std::move(decrypted_data), DECRYPTED_SHARED_FONTS[i]); - broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(data))); - broker.SignalStateChanged(); + const auto temp_dir = + system.GetFilesystem()->CreateDirectory(fonts_dir, FileSys::Mode::ReadWrite); + + const auto out_file = temp_dir->CreateFile(DECRYPTED_SHARED_FONTS[i]); - if (!temporary_dir.empty() && Common::FS::IsDirectory(temporary_dir)) { - Common::FS::DeleteDirRecursively(temporary_dir); + FileSys::VfsRawCopy(decrypted_font, out_file); } } -void WebBrowser::InitializeInternal() { - using WebAppletInitializer = void (WebBrowser::*)(); +} // namespace - constexpr std::array<WebAppletInitializer, SHIM_KIND_COUNT> functions{ - nullptr, &WebBrowser::InitializeShop, - nullptr, &WebBrowser::InitializeOffline, - nullptr, nullptr, - nullptr, nullptr, - }; +WebBrowser::WebBrowser(Core::System& system_, const Core::Frontend::WebBrowserApplet& frontend_) + : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {} - const auto index = static_cast<u32>(kind); +WebBrowser::~WebBrowser() = default; - if (index > functions.size() || functions[index] == nullptr) { - LOG_ERROR(Service_AM, "Invalid shim_kind={:08X}", index); - return; - } +void WebBrowser::Initialize() { + Applet::Initialize(); - const auto function = functions[index]; - (this->*function)(); -} + LOG_INFO(Service_AM, "Initializing Web Browser Applet."); -void WebBrowser::ExecuteInternal() { - using WebAppletExecutor = void (WebBrowser::*)(); + LOG_DEBUG(Service_AM, + "Initializing Applet with common_args: arg_version={}, lib_version={}, " + "play_startup_sound={}, size={}, system_tick={}, theme_color={}", + common_args.arguments_version, common_args.library_version, + common_args.play_startup_sound, common_args.size, common_args.system_tick, + common_args.theme_color); - constexpr std::array<WebAppletExecutor, SHIM_KIND_COUNT> functions{ - nullptr, &WebBrowser::ExecuteShop, - nullptr, &WebBrowser::ExecuteOffline, - nullptr, nullptr, - nullptr, nullptr, - }; + web_applet_version = WebAppletVersion{common_args.library_version}; - const auto index = static_cast<u32>(kind); + const auto web_arg_storage = broker.PopNormalDataToApplet(); + ASSERT(web_arg_storage != nullptr); - if (index > functions.size() || functions[index] == nullptr) { - LOG_ERROR(Service_AM, "Invalid shim_kind={:08X}", index); - return; - } + const auto& web_arg = web_arg_storage->GetData(); + ASSERT_OR_EXECUTE(web_arg.size() >= sizeof(WebArgHeader), { return; }); - const auto function = functions[index]; - (this->*function)(); -} + web_arg_input_tlv_map = ReadWebArgs(web_arg, web_arg_header); -void WebBrowser::InitializeShop() { - if (frontend_e_commerce == nullptr) { - LOG_ERROR(Service_AM, "Missing ECommerce Applet frontend!"); - status = RESULT_UNKNOWN; - return; - } + LOG_DEBUG(Service_AM, "WebArgHeader: total_tlv_entries={}, shim_kind={}", + web_arg_header.total_tlv_entries, web_arg_header.shim_kind); - const auto user_id_data = args.find(WebArgTLVType::UserID); + ExtractSharedFonts(system); - user_id = std::nullopt; - if (user_id_data != args.end()) { - user_id = u128{}; - std::memcpy(user_id->data(), user_id_data->second.data(), sizeof(u128)); + switch (web_arg_header.shim_kind) { + case ShimKind::Shop: + InitializeShop(); + break; + case ShimKind::Login: + InitializeLogin(); + break; + case ShimKind::Offline: + InitializeOffline(); + break; + case ShimKind::Share: + InitializeShare(); + break; + case ShimKind::Web: + InitializeWeb(); + break; + case ShimKind::Wifi: + InitializeWifi(); + break; + case ShimKind::Lobby: + InitializeLobby(); + break; + default: + UNREACHABLE_MSG("Invalid ShimKind={}", web_arg_header.shim_kind); + break; } +} - const auto url = args.find(WebArgTLVType::ShopArgumentsURL); +bool WebBrowser::TransactionComplete() const { + return complete; +} - if (url == args.end()) { - LOG_ERROR(Service_AM, "Missing EShop Arguments URL for initialization!"); - status = RESULT_UNKNOWN; - return; - } +ResultCode WebBrowser::GetStatus() const { + return status; +} - std::vector<std::string> split_query; - Common::SplitString(Common::StringFromFixedZeroTerminatedBuffer( - reinterpret_cast<const char*>(url->second.data()), url->second.size()), - '?', split_query); - - // 2 -> Main URL '?' Query Parameters - // Less is missing info, More is malformed - if (split_query.size() != 2) { - LOG_ERROR(Service_AM, "EShop Arguments has more than one question mark, malformed"); - status = RESULT_UNKNOWN; - return; - } +void WebBrowser::ExecuteInteractive() { + UNIMPLEMENTED_MSG("WebSession is not implemented"); +} - std::vector<std::string> queries; - Common::SplitString(split_query[1], '&', queries); +void WebBrowser::Execute() { + switch (web_arg_header.shim_kind) { + case ShimKind::Shop: + ExecuteShop(); + break; + case ShimKind::Login: + ExecuteLogin(); + break; + case ShimKind::Offline: + ExecuteOffline(); + break; + case ShimKind::Share: + ExecuteShare(); + break; + case ShimKind::Web: + ExecuteWeb(); + break; + case ShimKind::Wifi: + ExecuteWifi(); + break; + case ShimKind::Lobby: + ExecuteLobby(); + break; + default: + UNREACHABLE_MSG("Invalid ShimKind={}", web_arg_header.shim_kind); + WebBrowserExit(WebExitReason::EndButtonPressed); + break; + } +} - const auto split_single_query = - [](const std::string& in) -> std::pair<std::string, std::string> { - const auto index = in.find('='); - if (index == std::string::npos || index == in.size() - 1) { - return {in, ""}; - } +void WebBrowser::ExtractOfflineRomFS() { + LOG_DEBUG(Service_AM, "Extracting RomFS to {}", offline_cache_dir); - return {in.substr(0, index), in.substr(index + 1)}; - }; + const auto extracted_romfs_dir = + FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard); - std::transform(queries.begin(), queries.end(), - std::inserter(shop_query, std::next(shop_query.begin())), split_single_query); + const auto temp_dir = + system.GetFilesystem()->CreateDirectory(offline_cache_dir, FileSys::Mode::ReadWrite); - const auto scene = shop_query.find("scene"); + FileSys::VfsRawCopyD(extracted_romfs_dir, temp_dir); +} - if (scene == shop_query.end()) { - LOG_ERROR(Service_AM, "No scene parameter was passed via shop query!"); - status = RESULT_UNKNOWN; - return; +void WebBrowser::WebBrowserExit(WebExitReason exit_reason, std::string last_url) { + if ((web_arg_header.shim_kind == ShimKind::Share && + web_applet_version >= WebAppletVersion::Version196608) || + (web_arg_header.shim_kind == ShimKind::Web && + web_applet_version >= WebAppletVersion::Version524288)) { + // TODO: Push Output TLVs instead of a WebCommonReturnValue } - const std::map<std::string, ShopWebTarget, std::less<>> target_map{ - {"product_detail", ShopWebTarget::ApplicationInfo}, - {"aocs", ShopWebTarget::AddOnContentList}, - {"subscriptions", ShopWebTarget::SubscriptionList}, - {"consumption", ShopWebTarget::ConsumableItemList}, - {"settings", ShopWebTarget::Settings}, - {"top", ShopWebTarget::Home}, - }; + WebCommonReturnValue web_common_return_value; - const auto target = target_map.find(scene->second); - if (target == target_map.end()) { - LOG_ERROR(Service_AM, "Scene for shop query is invalid! (scene={})", scene->second); - status = RESULT_UNKNOWN; - return; - } + web_common_return_value.exit_reason = exit_reason; + std::memcpy(&web_common_return_value.last_url, last_url.data(), last_url.size()); + web_common_return_value.last_url_size = last_url.size(); - shop_web_target = target->second; + LOG_DEBUG(Service_AM, "WebCommonReturnValue: exit_reason={}, last_url={}, last_url_size={}", + exit_reason, last_url, last_url.size()); - const auto title_id_data = shop_query.find("dst_app_id"); - if (title_id_data != shop_query.end()) { - title_id = std::stoull(title_id_data->second, nullptr, 0x10); - } + complete = true; + std::vector<u8> out_data(sizeof(WebCommonReturnValue)); + std::memcpy(out_data.data(), &web_common_return_value, out_data.size()); + broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data))); + broker.SignalStateChanged(); +} - const auto mode_data = shop_query.find("mode"); - if (mode_data != shop_query.end()) { - shop_full_display = mode_data->second == "full"; - } +bool WebBrowser::InputTLVExistsInMap(WebArgInputTLVType input_tlv_type) const { + return web_arg_input_tlv_map.find(input_tlv_type) != web_arg_input_tlv_map.end(); } -void WebBrowser::InitializeOffline() { - if (args.find(WebArgTLVType::DocumentPath) == args.end() || - args.find(WebArgTLVType::DocumentKind) == args.end() || - args.find(WebArgTLVType::ApplicationID) == args.end()) { - status = RESULT_UNKNOWN; - LOG_ERROR(Service_AM, "Missing necessary parameters for initialization!"); +std::optional<std::vector<u8>> WebBrowser::GetInputTLVData(WebArgInputTLVType input_tlv_type) { + const auto map_it = web_arg_input_tlv_map.find(input_tlv_type); + + if (map_it == web_arg_input_tlv_map.end()) { + return std::nullopt; } - const auto url_data = args[WebArgTLVType::DocumentPath]; - filename = Common::StringFromFixedZeroTerminatedBuffer( - reinterpret_cast<const char*>(url_data.data()), url_data.size()); + return map_it->second; +} - OfflineWebSource source; - ASSERT(args[WebArgTLVType::DocumentKind].size() >= 4); - std::memcpy(&source, args[WebArgTLVType::DocumentKind].data(), sizeof(OfflineWebSource)); +void WebBrowser::InitializeShop() {} - constexpr std::array<const char*, 3> WEB_SOURCE_NAMES{ - "manual", - "legal", - "system", - }; +void WebBrowser::InitializeLogin() {} + +void WebBrowser::InitializeOffline() { + const auto document_path = + ParseStringValue(GetInputTLVData(WebArgInputTLVType::DocumentPath).value()); + + const auto document_kind = + ParseRawValue<DocumentKind>(GetInputTLVData(WebArgInputTLVType::DocumentKind).value()); + + std::string additional_paths; - temporary_dir = - Common::FS::SanitizePath(Common::FS::GetUserPath(Common::FS::UserPath::CacheDir) + - "web_applet_" + WEB_SOURCE_NAMES[static_cast<u32>(source) - 1], - Common::FS::DirectorySeparator::PlatformDefault); - Common::FS::DeleteDirRecursively(temporary_dir); - - u64 title_id = 0; // 0 corresponds to current process - ASSERT(args[WebArgTLVType::ApplicationID].size() >= 0x8); - std::memcpy(&title_id, args[WebArgTLVType::ApplicationID].data(), sizeof(u64)); - FileSys::ContentRecordType type = FileSys::ContentRecordType::Data; - - switch (source) { - case OfflineWebSource::OfflineHtmlPage: - // While there is an AppID TLV field, in official SW this is always ignored. - title_id = 0; - type = FileSys::ContentRecordType::HtmlDocument; + switch (document_kind) { + case DocumentKind::OfflineHtmlPage: + default: + title_id = system.CurrentProcess()->GetTitleID(); + nca_type = FileSys::ContentRecordType::HtmlDocument; + additional_paths = "html-document"; break; - case OfflineWebSource::ApplicationLegalInformation: - type = FileSys::ContentRecordType::LegalInformation; + case DocumentKind::ApplicationLegalInformation: + title_id = ParseRawValue<u64>(GetInputTLVData(WebArgInputTLVType::ApplicationID).value()); + nca_type = FileSys::ContentRecordType::LegalInformation; break; - case OfflineWebSource::SystemDataPage: - type = FileSys::ContentRecordType::Data; + case DocumentKind::SystemDataPage: + title_id = ParseRawValue<u64>(GetInputTLVData(WebArgInputTLVType::SystemDataID).value()); + nca_type = FileSys::ContentRecordType::Data; break; } - if (title_id == 0) { - title_id = system.CurrentProcess()->GetTitleID(); - } + static constexpr std::array<const char*, 3> RESOURCE_TYPES{ + "manual", + "legal_information", + "system_data", + }; - offline_romfs = GetApplicationRomFS(system, title_id, type); - if (offline_romfs == nullptr) { - status = RESULT_UNKNOWN; - LOG_ERROR(Service_AM, "Failed to find offline data for request!"); - } + offline_cache_dir = Common::FS::SanitizePath( + fmt::format("{}/offline_web_applet_{}/{:016X}", + Common::FS::GetUserPath(Common::FS::UserPath::CacheDir), + RESOURCE_TYPES[static_cast<u32>(document_kind) - 1], title_id), + Common::FS::DirectorySeparator::PlatformDefault); - std::string path_additional_directory; - if (source == OfflineWebSource::OfflineHtmlPage) { - path_additional_directory = std::string(DIR_SEP).append("html-document"); - } + offline_document = Common::FS::SanitizePath( + fmt::format("{}/{}/{}", offline_cache_dir, additional_paths, document_path), + Common::FS::DirectorySeparator::PlatformDefault); +} + +void WebBrowser::InitializeShare() {} - filename = - Common::FS::SanitizePath(temporary_dir + path_additional_directory + DIR_SEP + filename, - Common::FS::DirectorySeparator::PlatformDefault); +void WebBrowser::InitializeWeb() { + external_url = ParseStringValue(GetInputTLVData(WebArgInputTLVType::InitialURL).value()); } +void WebBrowser::InitializeWifi() {} + +void WebBrowser::InitializeLobby() {} + void WebBrowser::ExecuteShop() { - const auto callback = [this]() { Finalize(); }; + LOG_WARNING(Service_AM, "(STUBBED) called, Shop Applet is not implemented"); + WebBrowserExit(WebExitReason::EndButtonPressed); +} - const auto check_optional_parameter = [this](const auto& p) { - if (!p.has_value()) { - LOG_ERROR(Service_AM, "Missing one or more necessary parameters for execution!"); - status = RESULT_UNKNOWN; - return false; - } +void WebBrowser::ExecuteLogin() { + LOG_WARNING(Service_AM, "(STUBBED) called, Login Applet is not implemented"); + WebBrowserExit(WebExitReason::EndButtonPressed); +} - return true; - }; +void WebBrowser::ExecuteOffline() { + const auto main_url = Common::FS::SanitizePath(GetMainURL(offline_document), + Common::FS::DirectorySeparator::PlatformDefault); - switch (shop_web_target) { - case ShopWebTarget::ApplicationInfo: - if (!check_optional_parameter(title_id)) - return; - frontend_e_commerce->ShowApplicationInformation(callback, *title_id, user_id, - shop_full_display, shop_extra_parameter); - break; - case ShopWebTarget::AddOnContentList: - if (!check_optional_parameter(title_id)) - return; - frontend_e_commerce->ShowAddOnContentList(callback, *title_id, user_id, shop_full_display); - break; - case ShopWebTarget::ConsumableItemList: - if (!check_optional_parameter(title_id)) - return; - frontend_e_commerce->ShowConsumableItemList(callback, *title_id, user_id); - break; - case ShopWebTarget::Home: - if (!check_optional_parameter(user_id)) - return; - if (!check_optional_parameter(shop_full_display)) - return; - frontend_e_commerce->ShowShopHome(callback, *user_id, *shop_full_display); - break; - case ShopWebTarget::Settings: - if (!check_optional_parameter(user_id)) - return; - if (!check_optional_parameter(shop_full_display)) - return; - frontend_e_commerce->ShowSettings(callback, *user_id, *shop_full_display); - break; - case ShopWebTarget::SubscriptionList: - if (!check_optional_parameter(title_id)) + if (!Common::FS::Exists(main_url)) { + offline_romfs = GetOfflineRomFS(system, title_id, nca_type); + + if (offline_romfs == nullptr) { + LOG_ERROR(Service_AM, + "RomFS with title_id={:016X} and nca_type={} cannot be extracted!", title_id, + nca_type); + WebBrowserExit(WebExitReason::WindowClosed); return; - frontend_e_commerce->ShowSubscriptionList(callback, *title_id, user_id); - break; - default: - UNREACHABLE(); + } } + + LOG_INFO(Service_AM, "Opening offline document at {}", offline_document); + + frontend.OpenLocalWebPage( + offline_document, [this] { ExtractOfflineRomFS(); }, + [this](WebExitReason exit_reason, std::string last_url) { + WebBrowserExit(exit_reason, last_url); + }); } -void WebBrowser::ExecuteOffline() { - frontend.OpenPageLocal( - filename, [this] { UnpackRomFS(); }, [this] { Finalize(); }); +void WebBrowser::ExecuteShare() { + LOG_WARNING(Service_AM, "(STUBBED) called, Share Applet is not implemented"); + WebBrowserExit(WebExitReason::EndButtonPressed); +} + +void WebBrowser::ExecuteWeb() { + LOG_INFO(Service_AM, "Opening external URL at {}", external_url); + + frontend.OpenExternalWebPage(external_url, + [this](WebExitReason exit_reason, std::string last_url) { + WebBrowserExit(exit_reason, last_url); + }); } +void WebBrowser::ExecuteWifi() { + LOG_WARNING(Service_AM, "(STUBBED) called, Wifi Applet is not implemented"); + WebBrowserExit(WebExitReason::EndButtonPressed); +} + +void WebBrowser::ExecuteLobby() { + LOG_WARNING(Service_AM, "(STUBBED) called, Lobby Applet is not implemented"); + WebBrowserExit(WebExitReason::EndButtonPressed); +} } // namespace Service::AM::Applets diff --git a/src/core/hle/service/am/applets/web_browser.h b/src/core/hle/service/am/applets/web_browser.h index 8d4027411..04c274754 100644 --- a/src/core/hle/service/am/applets/web_browser.h +++ b/src/core/hle/service/am/applets/web_browser.h @@ -1,28 +1,31 @@ -// Copyright 2018 yuzu emulator team +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once -#include <map> +#include <optional> + +#include "common/common_funcs.h" +#include "common/common_types.h" #include "core/file_sys/vfs_types.h" -#include "core/hle/service/am/am.h" +#include "core/hle/result.h" #include "core/hle/service/am/applets/applets.h" +#include "core/hle/service/am/applets/web_types.h" namespace Core { class System; } -namespace Service::AM::Applets { +namespace FileSys { +enum class ContentRecordType : u8; +} -enum class ShimKind : u32; -enum class ShopWebTarget; -enum class WebArgTLVType : u16; +namespace Service::AM::Applets { class WebBrowser final : public Applet { public: - WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_, - Core::Frontend::ECommerceApplet* frontend_e_commerce_ = nullptr); + WebBrowser(Core::System& system_, const Core::Frontend::WebBrowserApplet& frontend_); ~WebBrowser() override; @@ -33,49 +36,50 @@ public: void ExecuteInteractive() override; void Execute() override; - // Callback to be fired when the frontend needs the manual RomFS unpacked to temporary - // directory. This is a blocking call and may take a while as some manuals can be up to 100MB in - // size. Attempting to access files at filename before invocation is likely to not work. - void UnpackRomFS(); + void ExtractOfflineRomFS(); - // Callback to be fired when the frontend is finished browsing. This will delete the temporary - // manual RomFS extracted files, so ensure this is only called at actual finalization. - void Finalize(); + void WebBrowserExit(WebExitReason exit_reason, std::string last_url = ""); private: - void InitializeInternal(); - void ExecuteInternal(); + bool InputTLVExistsInMap(WebArgInputTLVType input_tlv_type) const; - // Specific initializers for the types of web applets + std::optional<std::vector<u8>> GetInputTLVData(WebArgInputTLVType input_tlv_type); + + // Initializers for the various types of browser applets void InitializeShop(); + void InitializeLogin(); void InitializeOffline(); + void InitializeShare(); + void InitializeWeb(); + void InitializeWifi(); + void InitializeLobby(); - // Specific executors for the types of web applets + // Executors for the various types of browser applets void ExecuteShop(); + void ExecuteLogin(); void ExecuteOffline(); + void ExecuteShare(); + void ExecuteWeb(); + void ExecuteWifi(); + void ExecuteLobby(); - Core::Frontend::WebBrowserApplet& frontend; - - // Extra frontends for specialized functions - Core::Frontend::ECommerceApplet* frontend_e_commerce; + const Core::Frontend::WebBrowserApplet& frontend; - bool complete = false; - bool unpacked = false; - ResultCode status = RESULT_SUCCESS; + bool complete{false}; + ResultCode status{RESULT_SUCCESS}; - ShimKind kind; - std::map<WebArgTLVType, std::vector<u8>> args; + WebAppletVersion web_applet_version; + WebExitReason web_exit_reason; + WebArgHeader web_arg_header; + WebArgInputTLVMap web_arg_input_tlv_map; + u64 title_id; + FileSys::ContentRecordType nca_type; + std::string offline_cache_dir; + std::string offline_document; FileSys::VirtualFile offline_romfs; - std::string temporary_dir; - std::string filename; - - ShopWebTarget shop_web_target; - std::map<std::string, std::string, std::less<>> shop_query; - std::optional<u64> title_id = 0; - std::optional<u128> user_id; - std::optional<bool> shop_full_display; - std::string shop_extra_parameter; + + std::string external_url; Core::System& system; }; diff --git a/src/core/hle/service/am/applets/web_types.h b/src/core/hle/service/am/applets/web_types.h new file mode 100644 index 000000000..419c2bf79 --- /dev/null +++ b/src/core/hle/service/am/applets/web_types.h @@ -0,0 +1,178 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <unordered_map> +#include <vector> + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/swap.h" + +namespace Service::AM::Applets { + +enum class WebAppletVersion : u32_le { + Version0 = 0x0, // Only used by WifiWebAuthApplet + Version131072 = 0x20000, // 1.0.0 - 2.3.0 + Version196608 = 0x30000, // 3.0.0 - 4.1.0 + Version327680 = 0x50000, // 5.0.0 - 5.1.0 + Version393216 = 0x60000, // 6.0.0 - 7.0.1 + Version524288 = 0x80000, // 8.0.0+ +}; + +enum class ShimKind : u32 { + Shop = 1, + Login = 2, + Offline = 3, + Share = 4, + Web = 5, + Wifi = 6, + Lobby = 7, +}; + +enum class WebExitReason : u32 { + EndButtonPressed = 0, + BackButtonPressed = 1, + ExitRequested = 2, + CallbackURL = 3, + WindowClosed = 4, + ErrorDialog = 7, +}; + +enum class WebArgInputTLVType : u16 { + InitialURL = 0x1, + CallbackURL = 0x3, + CallbackableURL = 0x4, + ApplicationID = 0x5, + DocumentPath = 0x6, + DocumentKind = 0x7, + SystemDataID = 0x8, + ShareStartPage = 0x9, + Whitelist = 0xA, + News = 0xB, + UserID = 0xE, + AlbumEntry0 = 0xF, + ScreenShotEnabled = 0x10, + EcClientCertEnabled = 0x11, + PlayReportEnabled = 0x13, + BootDisplayKind = 0x17, + BackgroundKind = 0x18, + FooterEnabled = 0x19, + PointerEnabled = 0x1A, + LeftStickMode = 0x1B, + KeyRepeatFrame1 = 0x1C, + KeyRepeatFrame2 = 0x1D, + BootAsMediaPlayerInverted = 0x1E, + DisplayURLKind = 0x1F, + BootAsMediaPlayer = 0x21, + ShopJumpEnabled = 0x22, + MediaAutoPlayEnabled = 0x23, + LobbyParameter = 0x24, + ApplicationAlbumEntry = 0x26, + JsExtensionEnabled = 0x27, + AdditionalCommentText = 0x28, + TouchEnabledOnContents = 0x29, + UserAgentAdditionalString = 0x2A, + AdditionalMediaData0 = 0x2B, + MediaPlayerAutoCloseEnabled = 0x2C, + PageCacheEnabled = 0x2D, + WebAudioEnabled = 0x2E, + YouTubeVideoWhitelist = 0x31, + FooterFixedKind = 0x32, + PageFadeEnabled = 0x33, + MediaCreatorApplicationRatingAge = 0x34, + BootLoadingIconEnabled = 0x35, + PageScrollIndicatorEnabled = 0x36, + MediaPlayerSpeedControlEnabled = 0x37, + AlbumEntry1 = 0x38, + AlbumEntry2 = 0x39, + AlbumEntry3 = 0x3A, + AdditionalMediaData1 = 0x3B, + AdditionalMediaData2 = 0x3C, + AdditionalMediaData3 = 0x3D, + BootFooterButton = 0x3E, + OverrideWebAudioVolume = 0x3F, + OverrideMediaAudioVolume = 0x40, + BootMode = 0x41, + WebSessionEnabled = 0x42, + MediaPlayerOfflineEnabled = 0x43, +}; + +enum class WebArgOutputTLVType : u16 { + ShareExitReason = 0x1, + LastURL = 0x2, + LastURLSize = 0x3, + SharePostResult = 0x4, + PostServiceName = 0x5, + PostServiceNameSize = 0x6, + PostID = 0x7, + PostIDSize = 0x8, + MediaPlayerAutoClosedByCompletion = 0x9, +}; + +enum class DocumentKind : u32 { + OfflineHtmlPage = 1, + ApplicationLegalInformation = 2, + SystemDataPage = 3, +}; + +enum class ShareStartPage : u32 { + Default, + Settings, +}; + +enum class BootDisplayKind : u32 { + Default, + White, + Black, +}; + +enum class BackgroundKind : u32 { + Default, +}; + +enum class LeftStickMode : u32 { + Pointer, + Cursor, +}; + +enum class WebSessionBootMode : u32 { + AllForeground, + AllForegroundInitiallyHidden, +}; + +struct WebArgHeader { + u16 total_tlv_entries{}; + INSERT_PADDING_BYTES(2); + ShimKind shim_kind{}; +}; +static_assert(sizeof(WebArgHeader) == 0x8, "WebArgHeader has incorrect size."); + +struct WebArgInputTLV { + WebArgInputTLVType input_tlv_type{}; + u16 arg_data_size{}; + INSERT_PADDING_WORDS(1); +}; +static_assert(sizeof(WebArgInputTLV) == 0x8, "WebArgInputTLV has incorrect size."); + +struct WebArgOutputTLV { + WebArgOutputTLVType output_tlv_type{}; + u16 arg_data_size{}; + INSERT_PADDING_WORDS(1); +}; +static_assert(sizeof(WebArgOutputTLV) == 0x8, "WebArgOutputTLV has incorrect size."); + +struct WebCommonReturnValue { + WebExitReason exit_reason{}; + INSERT_PADDING_WORDS(1); + std::array<char, 0x1000> last_url{}; + u64 last_url_size{}; +}; +static_assert(sizeof(WebCommonReturnValue) == 0x1010, "WebCommonReturnValue has incorrect size."); + +using WebArgInputTLVMap = std::unordered_map<WebArgInputTLVType, std::vector<u8>>; + +} // namespace Service::AM::Applets diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp index 6abac3f78..23e28565b 100644 --- a/src/core/hle/service/aoc/aoc_u.cpp +++ b/src/core/hle/service/aoc/aoc_u.cpp @@ -7,6 +7,7 @@ #include <vector> #include "common/logging/log.h" #include "core/core.h" +#include "core/file_sys/common_funcs.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/control_metadata.h" #include "core/file_sys/nca_metadata.h" @@ -23,11 +24,8 @@ namespace Service::AOC { -constexpr u64 DLC_BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000; -constexpr u64 DLC_BASE_TO_AOC_ID = 0x1000; - static bool CheckAOCTitleIDMatchesBase(u64 title_id, u64 base) { - return (title_id & DLC_BASE_TITLE_ID_MASK) == base; + return FileSys::GetBaseTitleID(title_id) == base; } static std::vector<u64> AccumulateAOCTitleIDs(Core::System& system) { @@ -48,6 +46,62 @@ static std::vector<u64> AccumulateAOCTitleIDs(Core::System& system) { return add_on_content; } +class IPurchaseEventManager final : public ServiceFramework<IPurchaseEventManager> { +public: + explicit IPurchaseEventManager(Core::System& system_) + : ServiceFramework{system_, "IPurchaseEventManager"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, &IPurchaseEventManager::SetDefaultDeliveryTarget, "SetDefaultDeliveryTarget"}, + {1, &IPurchaseEventManager::SetDeliveryTarget, "SetDeliveryTarget"}, + {2, &IPurchaseEventManager::GetPurchasedEventReadableHandle, "GetPurchasedEventReadableHandle"}, + {3, nullptr, "PopPurchasedProductInfo"}, + {4, nullptr, "PopPurchasedProductInfoWithUid"}, + }; + // clang-format on + + RegisterHandlers(functions); + + purchased_event = Kernel::WritableEvent::CreateEventPair( + system.Kernel(), "IPurchaseEventManager:PurchasedEvent"); + } + +private: + void SetDefaultDeliveryTarget(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + + const auto unknown_1 = rp.Pop<u64>(); + [[maybe_unused]] const auto unknown_2 = ctx.ReadBuffer(); + + LOG_WARNING(Service_AOC, "(STUBBED) called, unknown_1={}", unknown_1); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } + + void SetDeliveryTarget(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + + const auto unknown_1 = rp.Pop<u64>(); + [[maybe_unused]] const auto unknown_2 = ctx.ReadBuffer(); + + LOG_WARNING(Service_AOC, "(STUBBED) called, unknown_1={}", unknown_1); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } + + void GetPurchasedEventReadableHandle(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_AOC, "called"); + + IPC::ResponseBuilder rb{ctx, 2, 1}; + rb.Push(RESULT_SUCCESS); + rb.PushCopyObjects(purchased_event.readable); + } + + Kernel::EventPair purchased_event; +}; + AOC_U::AOC_U(Core::System& system_) : ServiceFramework{system_, "aoc:u"}, add_on_content{AccumulateAOCTitleIDs(system)} { // clang-format off @@ -62,8 +116,8 @@ AOC_U::AOC_U(Core::System& system_) {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"}, {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, {9, nullptr, "GetAddOnContentLostErrorCode"}, - {100, nullptr, "CreateEcPurchasedEventManager"}, - {101, nullptr, "CreatePermanentEcPurchasedEventManager"}, + {100, &AOC_U::CreateEcPurchasedEventManager, "CreateEcPurchasedEventManager"}, + {101, &AOC_U::CreatePermanentEcPurchasedEventManager, "CreatePermanentEcPurchasedEventManager"}, }; // clang-format on @@ -123,11 +177,11 @@ void AOC_U::ListAddOnContent(Kernel::HLERequestContext& ctx) { const auto& disabled = Settings::values.disabled_addons[current]; if (std::find(disabled.begin(), disabled.end(), "DLC") == disabled.end()) { for (u64 content_id : add_on_content) { - if ((content_id & DLC_BASE_TITLE_ID_MASK) != current) { + if (FileSys::GetBaseTitleID(content_id) != current) { continue; } - out.push_back(static_cast<u32>(content_id & 0x7FF)); + out.push_back(static_cast<u32>(FileSys::GetAOCID(content_id))); } } @@ -169,7 +223,7 @@ void AOC_U::GetAddOnContentBaseId(Kernel::HLERequestContext& ctx) { const auto res = pm.GetControlMetadata(); if (res.first == nullptr) { - rb.Push(title_id + DLC_BASE_TO_AOC_ID); + rb.Push(FileSys::GetAOCBaseTitleID(title_id)); return; } @@ -201,6 +255,22 @@ void AOC_U::GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx) { rb.PushCopyObjects(aoc_change_event.readable); } +void AOC_U::CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_AOC, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<IPurchaseEventManager>(system); +} + +void AOC_U::CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_AOC, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<IPurchaseEventManager>(system); +} + void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) { std::make_shared<AOC_U>(system)->InstallAsService(service_manager); } diff --git a/src/core/hle/service/aoc/aoc_u.h b/src/core/hle/service/aoc/aoc_u.h index 7628f4568..26ee51be0 100644 --- a/src/core/hle/service/aoc/aoc_u.h +++ b/src/core/hle/service/aoc/aoc_u.h @@ -27,6 +27,8 @@ private: void GetAddOnContentBaseId(Kernel::HLERequestContext& ctx); void PrepareAddOnContent(Kernel::HLERequestContext& ctx); void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx); + void CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx); + void CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx); std::vector<u64> add_on_content; Kernel::EventPair aoc_change_event; diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp index ce993bad3..03636642b 100644 --- a/src/core/hle/service/apm/controller.cpp +++ b/src/core/hle/service/apm/controller.cpp @@ -48,8 +48,7 @@ void Controller::SetPerformanceConfiguration(PerformanceMode mode, [config](const auto& entry) { return entry.first == config; }); if (iter == config_to_speed.cend()) { - LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}", - static_cast<u32>(config)); + LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}", config); return; } diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp index 89442e21e..298f6d520 100644 --- a/src/core/hle/service/apm/interface.cpp +++ b/src/core/hle/service/apm/interface.cpp @@ -28,8 +28,7 @@ private: const auto mode = rp.PopEnum<PerformanceMode>(); const auto config = rp.PopEnum<PerformanceConfiguration>(); - LOG_DEBUG(Service_APM, "called mode={} config={}", static_cast<u32>(mode), - static_cast<u32>(config)); + LOG_DEBUG(Service_APM, "called mode={} config={}", mode, config); controller.SetPerformanceConfiguration(mode, config); @@ -41,7 +40,7 @@ private: IPC::RequestParser rp{ctx}; const auto mode = rp.PopEnum<PerformanceMode>(); - LOG_DEBUG(Service_APM, "called mode={}", static_cast<u32>(mode)); + LOG_DEBUG(Service_APM, "called mode={}", mode); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); @@ -111,7 +110,7 @@ void APM_Sys::SetCpuBoostMode(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto mode = rp.PopEnum<CpuBoostMode>(); - LOG_DEBUG(Service_APM, "called, mode={:08X}", static_cast<u32>(mode)); + LOG_DEBUG(Service_APM, "called, mode={:08X}", mode); controller.SetFromCpuBoostMode(mode); diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 145f47ee2..0cd797109 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -70,8 +70,10 @@ public: Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioOutBufferReleased"); stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate, - audio_params.channel_count, std::move(unique_name), - [this] { buffer_event.writable->Signal(); }); + audio_params.channel_count, std::move(unique_name), [this] { + const auto guard = LockService(); + buffer_event.writable->Signal(); + }); } private: diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 6e7b7316c..c5c22d053 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -49,16 +49,16 @@ public: system_event = Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent"); - renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), system.Memory(), - audren_params, system_event.writable, - instance_number); + renderer = std::make_unique<AudioCore::AudioRenderer>( + system.CoreTiming(), system.Memory(), audren_params, + [this]() { + const auto guard = LockService(); + system_event.writable->Signal(); + }, + instance_number); } private: - void UpdateAudioCallback() { - system_event.writable->Signal(); - } - void GetSampleRate(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Audio, "called"); diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp index 3b6f7498e..e43f3f47f 100644 --- a/src/core/hle/service/bcat/backend/boxcat.cpp +++ b/src/core/hle/service/bcat/backend/boxcat.cpp @@ -483,7 +483,7 @@ Boxcat::StatusResult Boxcat::GetStatus(std::optional<std::string>& global, global = json["global"].get<std::string>(); if (json["games"].is_array()) { - for (const auto object : json["games"]) { + for (const auto& object : json["games"]) { if (object.is_object() && object.find("name") != object.end()) { EventStatus detail{}; if (object["header"].is_string()) { diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp index 9b7672a91..13147472e 100644 --- a/src/core/hle/service/fatal/fatal.cpp +++ b/src/core/hle/service/fatal/fatal.cpp @@ -111,8 +111,9 @@ static void GenerateErrorReport(Core::System& system, ResultCode error_code, static void ThrowFatalError(Core::System& system, ResultCode error_code, FatalType fatal_type, const FatalInfo& info) { - LOG_ERROR(Service_Fatal, "Threw fatal error type {} with error code 0x{:X}", - static_cast<u32>(fatal_type), error_code.raw); + LOG_ERROR(Service_Fatal, "Threw fatal error type {} with error code 0x{:X}", fatal_type, + error_code.raw); + switch (fatal_type) { case FatalType::ErrorReportAndScreen: GenerateErrorReport(system, error_code, info); diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index ca93062cf..b15c737e1 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp @@ -298,10 +298,35 @@ ResultVal<FileSys::VirtualFile> FileSystemController::OpenRomFSCurrentProcess() return romfs_factory->OpenCurrentProcess(system.CurrentProcess()->GetTitleID()); } +ResultVal<FileSys::VirtualFile> FileSystemController::OpenPatchedRomFS( + u64 title_id, FileSys::ContentRecordType type) const { + LOG_TRACE(Service_FS, "Opening patched RomFS for title_id={:016X}", title_id); + + if (romfs_factory == nullptr) { + // TODO: Find a better error code for this + return RESULT_UNKNOWN; + } + + return romfs_factory->OpenPatchedRomFS(title_id, type); +} + +ResultVal<FileSys::VirtualFile> FileSystemController::OpenPatchedRomFSWithProgramIndex( + u64 title_id, u8 program_index, FileSys::ContentRecordType type) const { + LOG_TRACE(Service_FS, "Opening patched RomFS for title_id={:016X}, program_index={}", title_id, + program_index); + + if (romfs_factory == nullptr) { + // TODO: Find a better error code for this + return RESULT_UNKNOWN; + } + + return romfs_factory->OpenPatchedRomFSWithProgramIndex(title_id, program_index, type); +} + ResultVal<FileSys::VirtualFile> FileSystemController::OpenRomFS( u64 title_id, FileSys::StorageId storage_id, FileSys::ContentRecordType type) const { LOG_TRACE(Service_FS, "Opening RomFS for title_id={:016X}, storage_id={:02X}, type={:02X}", - title_id, static_cast<u8>(storage_id), static_cast<u8>(type)); + title_id, storage_id, type); if (romfs_factory == nullptr) { // TODO(bunnei): Find a better error code for this @@ -313,8 +338,8 @@ ResultVal<FileSys::VirtualFile> FileSystemController::OpenRomFS( ResultVal<FileSys::VirtualDir> FileSystemController::CreateSaveData( FileSys::SaveDataSpaceId space, const FileSys::SaveDataAttribute& save_struct) const { - LOG_TRACE(Service_FS, "Creating Save Data for space_id={:01X}, save_struct={}", - static_cast<u8>(space), save_struct.DebugInfo()); + LOG_TRACE(Service_FS, "Creating Save Data for space_id={:01X}, save_struct={}", space, + save_struct.DebugInfo()); if (save_data_factory == nullptr) { return FileSys::ERROR_ENTITY_NOT_FOUND; @@ -325,8 +350,8 @@ ResultVal<FileSys::VirtualDir> FileSystemController::CreateSaveData( ResultVal<FileSys::VirtualDir> FileSystemController::OpenSaveData( FileSys::SaveDataSpaceId space, const FileSys::SaveDataAttribute& attribute) const { - LOG_TRACE(Service_FS, "Opening Save Data for space_id={:01X}, save_struct={}", - static_cast<u8>(space), attribute.DebugInfo()); + LOG_TRACE(Service_FS, "Opening Save Data for space_id={:01X}, save_struct={}", space, + attribute.DebugInfo()); if (save_data_factory == nullptr) { return FileSys::ERROR_ENTITY_NOT_FOUND; @@ -337,7 +362,7 @@ ResultVal<FileSys::VirtualDir> FileSystemController::OpenSaveData( ResultVal<FileSys::VirtualDir> FileSystemController::OpenSaveDataSpace( FileSys::SaveDataSpaceId space) const { - LOG_TRACE(Service_FS, "Opening Save Data Space for space_id={:01X}", static_cast<u8>(space)); + LOG_TRACE(Service_FS, "Opening Save Data Space for space_id={:01X}", space); if (save_data_factory == nullptr) { return FileSys::ERROR_ENTITY_NOT_FOUND; @@ -358,7 +383,7 @@ ResultVal<FileSys::VirtualDir> FileSystemController::OpenSDMC() const { ResultVal<FileSys::VirtualDir> FileSystemController::OpenBISPartition( FileSys::BisPartitionId id) const { - LOG_TRACE(Service_FS, "Opening BIS Partition with id={:08X}", static_cast<u32>(id)); + LOG_TRACE(Service_FS, "Opening BIS Partition with id={:08X}", id); if (bis_factory == nullptr) { return FileSys::ERROR_ENTITY_NOT_FOUND; @@ -374,7 +399,7 @@ ResultVal<FileSys::VirtualDir> FileSystemController::OpenBISPartition( ResultVal<FileSys::VirtualFile> FileSystemController::OpenBISPartitionStorage( FileSys::BisPartitionId id) const { - LOG_TRACE(Service_FS, "Opening BIS Partition Storage with id={:08X}", static_cast<u32>(id)); + LOG_TRACE(Service_FS, "Opening BIS Partition Storage with id={:08X}", id); if (bis_factory == nullptr) { return FileSys::ERROR_ENTITY_NOT_FOUND; diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h index 6dbbf0b2b..7102d3f9a 100644 --- a/src/core/hle/service/filesystem/filesystem.h +++ b/src/core/hle/service/filesystem/filesystem.h @@ -66,6 +66,10 @@ public: void SetPackedUpdate(FileSys::VirtualFile update_raw); ResultVal<FileSys::VirtualFile> OpenRomFSCurrentProcess() const; + ResultVal<FileSys::VirtualFile> OpenPatchedRomFS(u64 title_id, + FileSys::ContentRecordType type) const; + ResultVal<FileSys::VirtualFile> OpenPatchedRomFSWithProgramIndex( + u64 title_id, u8 program_index, FileSys::ContentRecordType type) const; ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId storage_id, FileSys::ContentRecordType type) const; ResultVal<FileSys::VirtualDir> CreateSaveData( diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index b3480494c..9cc260515 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -413,7 +413,7 @@ public: const auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>()); - LOG_DEBUG(Service_FS, "called. file={}, mode={}", name, static_cast<u32>(mode)); + LOG_DEBUG(Service_FS, "called. file={}, mode={}", name, mode); auto result = backend.OpenFile(name, mode); if (result.Failed()) { @@ -553,8 +553,7 @@ private: const auto save_root = fsc.OpenSaveDataSpace(space); if (save_root.Failed() || *save_root == nullptr) { - LOG_ERROR(Service_FS, "The save root for the space_id={:02X} was invalid!", - static_cast<u8>(space)); + LOG_ERROR(Service_FS, "The save root for the space_id={:02X} was invalid!", space); return; } @@ -718,7 +717,7 @@ FSP_SRV::FSP_SRV(Core::System& system_) {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"}, {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"}, {204, nullptr, "OpenDataFileSystemByProgramIndex"}, - {205, nullptr, "OpenDataStorageByProgramIndex"}, + {205, &FSP_SRV::OpenDataStorageWithProgramIndex, "OpenDataStorageWithProgramIndex"}, {400, nullptr, "OpenDeviceOperator"}, {500, nullptr, "OpenSdCardDetectionEventNotifier"}, {501, nullptr, "OpenGameCardDetectionEventNotifier"}, @@ -795,8 +794,7 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) { const auto type = rp.PopRaw<FileSystemType>(); const auto title_id = rp.PopRaw<u64>(); - LOG_WARNING(Service_FS, "(STUBBED) called with type={}, title_id={:016X}", - static_cast<u8>(type), title_id); + LOG_WARNING(Service_FS, "(STUBBED) called with type={}, title_id={:016X}", type, title_id); IPC::ResponseBuilder rb{ctx, 2, 0, 0}; rb.Push(RESULT_UNKNOWN); @@ -883,7 +881,7 @@ void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) { void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto space = rp.PopRaw<FileSys::SaveDataSpaceId>(); - LOG_INFO(Service_FS, "called, space={}", static_cast<u8>(space)); + LOG_INFO(Service_FS, "called, space={}", space); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); @@ -915,10 +913,10 @@ void FSP_SRV::ReadSaveDataFileSystemExtraDataWithMaskBySaveDataAttribute( "(STUBBED) called, flags={}, space_id={}, attribute.title_id={:016X}\n" "attribute.user_id={:016X}{:016X}, attribute.save_id={:016X}\n" "attribute.type={}, attribute.rank={}, attribute.index={}", - flags, static_cast<u32>(parameters.space_id), parameters.attribute.title_id, + flags, parameters.space_id, parameters.attribute.title_id, parameters.attribute.user_id[1], parameters.attribute.user_id[0], - parameters.attribute.save_id, static_cast<u32>(parameters.attribute.type), - static_cast<u32>(parameters.attribute.rank), parameters.attribute.index); + parameters.attribute.save_id, parameters.attribute.type, parameters.attribute.rank, + parameters.attribute.index); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); @@ -951,7 +949,7 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) { const auto title_id = rp.PopRaw<u64>(); LOG_DEBUG(Service_FS, "called with storage_id={:02X}, unknown={:08X}, title_id={:016X}", - static_cast<u8>(storage_id), unknown, title_id); + storage_id, unknown, title_id); auto data = fsc.OpenRomFS(title_id, storage_id, FileSys::ContentRecordType::Data); @@ -968,7 +966,7 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) { // TODO(DarkLordZach): Find the right error code to use here LOG_ERROR(Service_FS, "could not open data storage with title_id={:016X}, storage_id={:02X}", title_id, - static_cast<u8>(storage_id)); + storage_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_UNKNOWN); return; @@ -987,21 +985,46 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) { void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - auto storage_id = rp.PopRaw<FileSys::StorageId>(); - auto title_id = rp.PopRaw<u64>(); + const auto storage_id = rp.PopRaw<FileSys::StorageId>(); + const auto title_id = rp.PopRaw<u64>(); - LOG_DEBUG(Service_FS, "called with storage_id={:02X}, title_id={:016X}", - static_cast<u8>(storage_id), title_id); + LOG_DEBUG(Service_FS, "called with storage_id={:02X}, title_id={:016X}", storage_id, title_id); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND); } +void FSP_SRV::OpenDataStorageWithProgramIndex(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + + const auto program_index = rp.PopRaw<u8>(); + + LOG_DEBUG(Service_FS, "called, program_index={}", program_index); + + auto romfs = fsc.OpenPatchedRomFSWithProgramIndex( + system.CurrentProcess()->GetTitleID(), program_index, FileSys::ContentRecordType::Program); + + if (romfs.Failed()) { + // TODO: Find the right error code to use here + LOG_ERROR(Service_FS, "could not open storage with program_index={}", program_index); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_UNKNOWN); + return; + } + + auto storage = std::make_shared<IStorage>(system, std::move(romfs.Unwrap())); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<IStorage>(std::move(storage)); +} + void FSP_SRV::SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; log_mode = rp.PopEnum<LogMode>(); - LOG_DEBUG(Service_FS, "called, log_mode={:08X}", static_cast<u32>(log_mode)); + LOG_DEBUG(Service_FS, "called, log_mode={:08X}", log_mode); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h index 472286d6e..8ed933279 100644 --- a/src/core/hle/service/filesystem/fsp_srv.h +++ b/src/core/hle/service/filesystem/fsp_srv.h @@ -49,6 +49,7 @@ private: void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); + void OpenDataStorageWithProgramIndex(Kernel::HLERequestContext& ctx); void SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx); diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index 40a289594..c5b053c31 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp @@ -229,8 +229,7 @@ private: break; default: // HOS seems not have an error case for an unknown notification - LOG_WARNING(Service_ACC, "Unknown notification {:08X}", - static_cast<u32>(notification.notification_type)); + LOG_WARNING(Service_ACC, "Unknown notification {:08X}", notification.notification_type); break; } diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 66c4fe60a..d280e7caf 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -116,6 +116,31 @@ u32 Controller_NPad::IndexToNPad(std::size_t index) { } } +bool Controller_NPad::IsNpadIdValid(u32 npad_id) { + switch (npad_id) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case NPAD_UNKNOWN: + case NPAD_HANDHELD: + return true; + default: + LOG_ERROR(Service_HID, "Invalid npad id {}", npad_id); + return false; + } +} + +bool Controller_NPad::IsDeviceHandleValid(const DeviceHandle& device_handle) { + return IsNpadIdValid(device_handle.npad_id) && + device_handle.npad_type < NpadType::MaxNpadType && + device_handle.device_index < DeviceIndex::MaxDeviceIndex; +} + Controller_NPad::Controller_NPad(Core::System& system) : ControllerBase(system), system(system) {} Controller_NPad::~Controller_NPad() { @@ -742,6 +767,10 @@ bool Controller_NPad::VibrateControllerAtIndex(std::size_t npad_index, std::size void Controller_NPad::VibrateController(const DeviceHandle& vibration_device_handle, const VibrationValue& vibration_value) { + if (!IsDeviceHandleValid(vibration_device_handle)) { + return; + } + if (!Settings::values.vibration_enabled.GetValue() && !permit_vibration_session_enabled) { return; } @@ -798,12 +827,20 @@ void Controller_NPad::VibrateControllers(const std::vector<DeviceHandle>& vibrat Controller_NPad::VibrationValue Controller_NPad::GetLastVibration( const DeviceHandle& vibration_device_handle) const { + if (!IsDeviceHandleValid(vibration_device_handle)) { + return {}; + } + const auto npad_index = NPadIdToIndex(vibration_device_handle.npad_id); const auto device_index = static_cast<std::size_t>(vibration_device_handle.device_index); return latest_vibration_values[npad_index][device_index]; } void Controller_NPad::InitializeVibrationDevice(const DeviceHandle& vibration_device_handle) { + if (!IsDeviceHandleValid(vibration_device_handle)) { + return; + } + const auto npad_index = NPadIdToIndex(vibration_device_handle.npad_id); const auto device_index = static_cast<std::size_t>(vibration_device_handle.device_index); InitializeVibrationDeviceAtIndex(npad_index, device_index); @@ -824,6 +861,10 @@ void Controller_NPad::SetPermitVibrationSession(bool permit_vibration_session) { } bool Controller_NPad::IsVibrationDeviceMounted(const DeviceHandle& vibration_device_handle) const { + if (!IsDeviceHandleValid(vibration_device_handle)) { + return false; + } + const auto npad_index = NPadIdToIndex(vibration_device_handle.npad_id); const auto device_index = static_cast<std::size_t>(vibration_device_handle.device_index); return vibration_devices_mounted[npad_index][device_index]; @@ -1017,7 +1058,7 @@ void Controller_NPad::ClearAllControllers() { } u32 Controller_NPad::GetAndResetPressState() { - return std::exchange(press_state, 0); + return press_state.exchange(0); } bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const { diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 96f319294..e2e826623 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <atomic> #include "common/bit_field.h" #include "common/common_types.h" #include "core/frontend/input.h" @@ -56,12 +57,14 @@ public: JoyconLeft = 6, JoyconRight = 7, Pokeball = 9, + MaxNpadType = 10, }; enum class DeviceIndex : u8 { Left = 0, Right = 1, None = 2, + MaxDeviceIndex = 3, }; enum class GyroscopeZeroDriftMode : u32 { @@ -213,6 +216,8 @@ public: static Settings::ControllerType MapNPadToSettingsType(Controller_NPad::NPadControllerType type); static std::size_t NPadIdToIndex(u32 npad_id); static u32 IndexToNPad(std::size_t index); + static bool IsNpadIdValid(u32 npad_id); + static bool IsDeviceHandleValid(const DeviceHandle& device_handle); private: struct CommonHeader { @@ -411,7 +416,7 @@ private: bool IsControllerSupported(NPadControllerType controller) const; void RequestPadStateUpdate(u32 npad_id); - u32 press_state{}; + std::atomic<u32> press_state{}; NpadStyleSet style{}; std::array<NPadEntry, 10> shared_memory_entries{}; diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index b3c7234e1..8d95f74e6 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -78,11 +78,13 @@ IAppletResource::IAppletResource(Core::System& system_) pad_update_event = Core::Timing::CreateEvent( "HID::UpdatePadCallback", [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { + const auto guard = LockService(); UpdateControllers(user_data, ns_late); }); motion_update_event = Core::Timing::CreateEvent( "HID::MotionPadCallback", [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { + const auto guard = LockService(); UpdateMotion(user_data, ns_late); }); diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp index f884b2735..8e49b068c 100644 --- a/src/core/hle/service/lm/lm.cpp +++ b/src/core/hle/service/lm/lm.cpp @@ -68,7 +68,7 @@ private: IPC::RequestParser rp{ctx}; const auto destination = rp.PopEnum<DestinationFlag>(); - LOG_DEBUG(Service_LM, "called, destination={:08X}", static_cast<u32>(destination)); + LOG_DEBUG(Service_LM, "called, destination={:08X}", destination); manager.SetDestination(destination); diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp index b8d627ca8..2dcda16f6 100644 --- a/src/core/hle/service/ncm/ncm.cpp +++ b/src/core/hle/service/ncm/ncm.cpp @@ -45,7 +45,7 @@ public: } private: - FileSys::StorageId storage; + [[maybe_unused]] FileSys::StorageId storage; }; class IRegisteredLocationResolver final : public ServiceFramework<IRegisteredLocationResolver> { diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp index d33b26129..d16223064 100644 --- a/src/core/hle/service/nim/nim.cpp +++ b/src/core/hle/service/nim/nim.cpp @@ -217,7 +217,7 @@ public: {1, nullptr, "RefreshDebugAvailability"}, {2, nullptr, "ClearDebugResponse"}, {3, nullptr, "RegisterDebugResponse"}, - {4, nullptr, "IsLargeResourceAvailable"}, + {4, &NIM_ECA::IsLargeResourceAvailable, "IsLargeResourceAvailable"}, }; // clang-format on @@ -231,6 +231,18 @@ private: rb.Push(RESULT_SUCCESS); rb.PushIpcInterface<IShopServiceAccessServer>(system); } + + void IsLargeResourceAvailable(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + + const auto unknown{rp.Pop<u64>()}; + + LOG_INFO(Service_NIM, "(STUBBED) called, unknown={}", unknown); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push(false); + } }; class NIM_SHP final : public ServiceFramework<NIM_SHP> { diff --git a/src/core/hle/service/ns/ns.cpp b/src/core/hle/service/ns/ns.cpp index ef7584641..6ccf8995c 100644 --- a/src/core/hle/service/ns/ns.cpp +++ b/src/core/hle/service/ns/ns.cpp @@ -673,7 +673,7 @@ public: explicit NS_VM(Core::System& system_) : ServiceFramework{system_, "ns:vm"} { // clang-format off static const FunctionInfo functions[] = { - {1200, nullptr, "NeedsUpdateVulnerability"}, + {1200, &NS_VM::NeedsUpdateVulnerability, "NeedsUpdateVulnerability"}, {1201, nullptr, "UpdateSafeSystemVersionForDebug"}, {1202, nullptr, "GetSafeSystemVersion"}, }; @@ -681,6 +681,15 @@ public: RegisterHandlers(functions); } + +private: + void NeedsUpdateVulnerability(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_NS, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push(false); + } }; void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) { diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp index ccc137e40..71c7587db 100644 --- a/src/core/hle/service/ns/pl_u.cpp +++ b/src/core/hle/service/ns/pl_u.cpp @@ -27,29 +27,11 @@ namespace Service::NS { -enum class FontArchives : u64 { - Extension = 0x0100000000000810, - Standard = 0x0100000000000811, - Korean = 0x0100000000000812, - ChineseTraditional = 0x0100000000000813, - ChineseSimple = 0x0100000000000814, -}; - struct FontRegion { u32 offset; u32 size; }; -constexpr std::array<std::pair<FontArchives, const char*>, 7> SHARED_FONTS{ - std::make_pair(FontArchives::Standard, "nintendo_udsg-r_std_003.bfttf"), - std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_org_zh-cn_003.bfttf"), - std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_ext_zh-cn_003.bfttf"), - std::make_pair(FontArchives::ChineseTraditional, "nintendo_udjxh-db_zh-tw_003.bfttf"), - std::make_pair(FontArchives::Korean, "nintendo_udsg-r_ko_003.bfttf"), - std::make_pair(FontArchives::Extension, "nintendo_ext_003.bfttf"), - std::make_pair(FontArchives::Extension, "nintendo_ext2_003.bfttf"), -}; - // The below data is specific to shared font data dumped from Switch on f/w 2.2 // Virtual address and offsets/sizes likely will vary by dump [[maybe_unused]] constexpr VAddr SHARED_FONT_MEM_VADDR{0x00000009d3016000ULL}; @@ -80,6 +62,18 @@ static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMem offset += transformed_font.size() * sizeof(u32); } +void DecryptSharedFontToTTF(const std::vector<u32>& input, std::vector<u8>& output) { + ASSERT_MSG(input[0] == EXPECTED_MAGIC, "Failed to derive key, unexpected magic number"); + + const u32 KEY = input[0] ^ EXPECTED_RESULT; // Derive key using an inverse xor + std::vector<u32> transformed_font(input.size()); + // TODO(ogniK): Figure out a better way to do this + std::transform(input.begin(), input.end(), transformed_font.begin(), + [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); }); + transformed_font[1] = Common::swap32(transformed_font[1]) ^ KEY; // "re-encrypt" the size + std::memcpy(output.data(), transformed_font.data() + 2, transformed_font.size() * sizeof(u32)); +} + void EncryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, std::size_t& offset) { ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, @@ -182,21 +176,18 @@ PL_U::PL_U(Core::System& system_) } if (!romfs) { - LOG_ERROR(Service_NS, "Failed to find or synthesize {:016X}! Skipping", - static_cast<u64>(font.first)); + LOG_ERROR(Service_NS, "Failed to find or synthesize {:016X}! Skipping", font.first); continue; } const auto extracted_romfs = FileSys::ExtractRomFS(romfs); if (!extracted_romfs) { - LOG_ERROR(Service_NS, "Failed to extract RomFS for {:016X}! Skipping", - static_cast<u64>(font.first)); + LOG_ERROR(Service_NS, "Failed to extract RomFS for {:016X}! Skipping", font.first); continue; } const auto font_fp = extracted_romfs->GetFile(font.second); if (!font_fp) { - LOG_ERROR(Service_NS, "{:016X} has no file \"{}\"! Skipping", - static_cast<u64>(font.first), font.second); + LOG_ERROR(Service_NS, "{:016X} has no file \"{}\"! Skipping", font.first, font.second); continue; } std::vector<u32> font_data_u32(font_fp->GetSize() / sizeof(u32)); diff --git a/src/core/hle/service/ns/pl_u.h b/src/core/hle/service/ns/pl_u.h index 224dcb997..f920c7f69 100644 --- a/src/core/hle/service/ns/pl_u.h +++ b/src/core/hle/service/ns/pl_u.h @@ -16,6 +16,25 @@ class FileSystemController; namespace NS { +enum class FontArchives : u64 { + Extension = 0x0100000000000810, + Standard = 0x0100000000000811, + Korean = 0x0100000000000812, + ChineseTraditional = 0x0100000000000813, + ChineseSimple = 0x0100000000000814, +}; + +constexpr std::array<std::pair<FontArchives, const char*>, 7> SHARED_FONTS{ + std::make_pair(FontArchives::Standard, "nintendo_udsg-r_std_003.bfttf"), + std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_org_zh-cn_003.bfttf"), + std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_ext_zh-cn_003.bfttf"), + std::make_pair(FontArchives::ChineseTraditional, "nintendo_udjxh-db_zh-tw_003.bfttf"), + std::make_pair(FontArchives::Korean, "nintendo_udsg-r_ko_003.bfttf"), + std::make_pair(FontArchives::Extension, "nintendo_ext_003.bfttf"), + std::make_pair(FontArchives::Extension, "nintendo_ext2_003.bfttf"), +}; + +void DecryptSharedFontToTTF(const std::vector<u32>& input, std::vector<u8>& output); void EncryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, std::size_t& offset); class PL_U final : public ServiceFramework<PL_U> { diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index 44a8bc060..5681599ba 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h @@ -31,8 +31,8 @@ public: * @param output A buffer where the output data will be written to. * @returns The result code of the ioctl. */ - virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) = 0; + virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, + std::vector<u8>& output) = 0; /** * Handles an ioctl2 request. @@ -43,8 +43,7 @@ public: * @returns The result code of the ioctl. */ virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) = 0; + const std::vector<u8>& inline_input, std::vector<u8>& output) = 0; /** * Handles an ioctl3 request. @@ -55,7 +54,7 @@ public: * @returns The result code of the ioctl. */ virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) = 0; + std::vector<u8>& inline_output) = 0; protected: Core::System& system; diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 170a7c9a0..ce615c758 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -18,21 +18,20 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_de : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} nvdisp_disp0 ::~nvdisp_disp0() = default; -NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) { +NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input, + std::vector<u8>& output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvdisp_disp0::Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvdisp_disp0::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) { + std::vector<u8>& inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index eb7575e40..55a33b7e4 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -20,13 +20,11 @@ public: explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); ~nvdisp_disp0() override; - NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + const std::vector<u8>& inline_input, std::vector<u8>& output) override; NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; + std::vector<u8>& inline_output) override; /// Performs a screen flip, drawing the buffer pointed to by the handle. void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 4e0652c39..6b062e10e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -21,8 +21,8 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_ : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} nvhost_as_gpu::~nvhost_as_gpu() = default; -NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) { +NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, + std::vector<u8>& output) { switch (command.group) { case 'A': switch (command.cmd) { @@ -55,14 +55,13 @@ NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std: } NvResult nvhost_as_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_as_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) { + std::vector<u8>& inline_output) { switch (command.group) { case 'A': switch (command.cmd) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 2bd355af9..08035fa0e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -30,13 +30,11 @@ public: explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); ~nvhost_as_gpu() override; - NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + const std::vector<u8>& inline_input, std::vector<u8>& output) override; NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; + std::vector<u8>& inline_output) override; private: class BufferMap final { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 92d31b620..fea3b7b9f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -20,8 +20,7 @@ nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface, : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} nvhost_ctrl::~nvhost_ctrl() = default; -NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) { +NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -30,9 +29,9 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v case 0x1c: return IocCtrlClearEventWait(input, output); case 0x1d: - return IocCtrlEventWait(input, output, false, ctrl); + return IocCtrlEventWait(input, output, false); case 0x1e: - return IocCtrlEventWait(input, output, true, ctrl); + return IocCtrlEventWait(input, output, true); case 0x1f: return IocCtrlEventRegister(input, output); case 0x20: @@ -48,14 +47,13 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v } NvResult nvhost_ctrl::Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_ctrl::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) { + std::vector<u8>& inline_outpu) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -69,7 +67,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector } NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, - bool is_async, IoctlCtrl& ctrl) { + bool is_async) { IocCtrlEventWaitParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}", @@ -141,12 +139,6 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector params.value |= event_id; event.event.writable->Clear(); gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); - if (!is_async && ctrl.fresh_call) { - ctrl.must_delay = true; - ctrl.timeout = params.timeout; - ctrl.event_id = event_id; - return NvResult::Timeout; - } std::memcpy(output.data(), ¶ms, sizeof(params)); return NvResult::Timeout; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 107168e21..c5aa1362a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -18,13 +18,11 @@ public: SyncpointManager& syncpoint_manager); ~nvhost_ctrl() override; - NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + const std::vector<u8>& inline_input, std::vector<u8>& output) override; NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; + std::vector<u8>& inline_output) override; private: struct IocSyncptReadParams { @@ -123,8 +121,7 @@ private: static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size"); NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); - NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async, - IoctlCtrl& ctrl); + NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async); NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 647f5907e..0320d3ae2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -16,7 +16,7 @@ nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {} nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, - std::vector<u8>& output, IoctlCtrl& ctrl) { + std::vector<u8>& output) { switch (command.group) { case 'G': switch (command.cmd) { @@ -48,15 +48,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, } NvResult nvhost_ctrl_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_ctrl_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, - std::vector<u8>& output, std::vector<u8>& inline_output, - IoctlCtrl& ctrl) { + std::vector<u8>& output, std::vector<u8>& inline_output) { switch (command.group) { case 'G': switch (command.cmd) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index c2fffe734..137b88238 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h @@ -16,13 +16,11 @@ public: explicit nvhost_ctrl_gpu(Core::System& system); ~nvhost_ctrl_gpu() override; - NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + const std::vector<u8>& inline_input, std::vector<u8>& output) override; NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; + std::vector<u8>& inline_output) override; private: struct IoctlGpuCharacteristics { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index b0c2caba5..af8b3d9f1 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -23,8 +23,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, nvhost_gpu::~nvhost_gpu() = default; -NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) { +NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -76,8 +75,7 @@ NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve }; NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { switch (command.group) { case 'H': switch (command.cmd) { @@ -91,7 +89,7 @@ NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, } NvResult nvhost_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) { + std::vector<u8>& inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index aa0048a9d..e0298b4fe 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -26,13 +26,11 @@ public: SyncpointManager& syncpoint_manager); ~nvhost_gpu() override; - NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + const std::vector<u8>& inline_input, std::vector<u8>& output) override; NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; + std::vector<u8>& inline_output) override; private: enum class CtxObjects : u32_le { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index b8328c314..d8735491c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -15,8 +15,8 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_de : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} nvhost_nvdec::~nvhost_nvdec() = default; -NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) { +NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, + std::vector<u8>& output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -58,14 +58,13 @@ NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std:: } NvResult nvhost_nvdec::Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_nvdec::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) { + std::vector<u8>& inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 884ed6c5b..79b8b6de1 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -14,13 +14,11 @@ public: explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); ~nvhost_nvdec() override; - NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + const std::vector<u8>& inline_input, std::vector<u8>& output) override; NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; + std::vector<u8>& inline_output) override; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index ab152bf0e..d9f95ba58 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -18,39 +18,6 @@ public: explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); ~nvhost_nvdec_common() override; - /** - * Handles an ioctl1 request. - * @param command The ioctl command id. - * @param input A buffer containing the input data for the ioctl. - * @param output A buffer where the output data will be written to. - * @returns The result code of the ioctl. - */ - virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) = 0; - - /** - * Handles an ioctl2 request. - * @param command The ioctl command id. - * @param input A buffer containing the input data for the ioctl. - * @param inline_input A buffer containing the input data for the ioctl which has been inlined. - * @param output A buffer where the output data will be written to. - * @returns The result code of the ioctl. - */ - virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) = 0; - - /** - * Handles an ioctl3 request. - * @param command The ioctl command id. - * @param input A buffer containing the input data for the ioctl. - * @param output A buffer where the output data will be written to. - * @param inline_output A buffer where the inlined output data will be written to. - * @returns The result code of the ioctl. - */ - virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) = 0; - protected: class BufferMap final { public: diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 6f4ab0ab3..2d06955c0 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp @@ -13,8 +13,8 @@ namespace Service::Nvidia::Devices { nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {} nvhost_nvjpg::~nvhost_nvjpg() = default; -NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) { +NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, + std::vector<u8>& output) { switch (command.group) { case 'H': switch (command.cmd) { @@ -33,14 +33,13 @@ NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std:: } NvResult nvhost_nvjpg::Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_nvjpg::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) { + std::vector<u8>& inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h index 6fb99d959..43948d18d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h @@ -16,13 +16,11 @@ public: explicit nvhost_nvjpg(Core::System& system); ~nvhost_nvjpg() override; - NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + const std::vector<u8>& inline_input, std::vector<u8>& output) override; NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; + std::vector<u8>& inline_output) override; private: struct IoctlSetNvmapFD { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 55a17f423..805fe86ae 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -15,8 +15,7 @@ nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) nvhost_vic::~nvhost_vic() = default; -NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) { +NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -51,14 +50,13 @@ NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve } NvResult nvhost_vic::Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_vic::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) { + std::vector<u8>& inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index 7f4858cd4..b2e11f4d4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -14,12 +14,10 @@ public: explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); ~nvhost_vic(); - NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + const std::vector<u8>& inline_input, std::vector<u8>& output) override; NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; + std::vector<u8>& inline_output) override; }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 910cfee51..4015a2740 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp @@ -19,8 +19,7 @@ nvmap::nvmap(Core::System& system) : nvdevice(system) { nvmap::~nvmap() = default; -NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) { +NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { switch (command.group) { case 0x1: switch (command.cmd) { @@ -49,14 +48,13 @@ NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector< } NvResult nvmap::Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvmap::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) { + std::vector<u8>& inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index c0c2fa5eb..4484bd79f 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h @@ -19,13 +19,11 @@ public: explicit nvmap(Core::System& system); ~nvmap() override; - NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) override; + const std::vector<u8>& inline_input, std::vector<u8>& output) override; NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, - std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; + std::vector<u8>& inline_output) override; /// Returns the allocated address of an nvmap object given its handle. VAddr GetObjectAddress(u32 handle) const; diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index d72c531f6..cc23b001c 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp @@ -61,32 +61,9 @@ void NVDRV::Ioctl1(Kernel::HLERequestContext& ctx) { std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); const auto input_buffer = ctx.ReadBuffer(0); - IoctlCtrl ctrl{}; - - const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer, ctrl); - if (ctrl.must_delay) { - ctrl.fresh_call = false; - ctx.SleepClientThread( - "NVServices::DelayedResponse", ctrl.timeout, - [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_, - Kernel::ThreadWakeupReason reason) { - IoctlCtrl ctrl2{ctrl}; - std::vector<u8> tmp_output = output_buffer; - const auto nv_result2 = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output, ctrl2); - - if (command.is_out != 0) { - ctx.WriteBuffer(tmp_output); - } - - IPC::ResponseBuilder rb{ctx_, 3}; - rb.Push(RESULT_SUCCESS); - rb.PushEnum(nv_result2); - }, - nvdrv->GetEventWriteable(ctrl.event_id)); - } else { - if (command.is_out != 0) { - ctx.WriteBuffer(output_buffer); - } + const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer); + if (command.is_out != 0) { + ctx.WriteBuffer(output_buffer); } IPC::ResponseBuilder rb{ctx, 3}; @@ -110,36 +87,8 @@ void NVDRV::Ioctl2(Kernel::HLERequestContext& ctx) { const auto input_inlined_buffer = ctx.ReadBuffer(1); std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); - IoctlCtrl ctrl{}; - const auto nv_result = - nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer, ctrl); - if (ctrl.must_delay) { - ctrl.fresh_call = false; - ctx.SleepClientThread( - "NVServices::DelayedResponse", ctrl.timeout, - [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_, - Kernel::ThreadWakeupReason reason) { - IoctlCtrl ctrl2{ctrl}; - std::vector<u8> tmp_output = output_buffer; - const auto nv_result2 = nvdrv->Ioctl2(fd, command, input_buffer, - input_inlined_buffer, tmp_output, ctrl2); - - if (command.is_out != 0) { - ctx.WriteBuffer(tmp_output); - } - - IPC::ResponseBuilder rb{ctx_, 3}; - rb.Push(RESULT_SUCCESS); - rb.PushEnum(nv_result2); - }, - nvdrv->GetEventWriteable(ctrl.event_id)); - } else { - if (command.is_out != 0) { - ctx.WriteBuffer(output_buffer); - } - } - + nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer); if (command.is_out != 0) { ctx.WriteBuffer(output_buffer); } @@ -165,36 +114,11 @@ void NVDRV::Ioctl3(Kernel::HLERequestContext& ctx) { std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1)); - IoctlCtrl ctrl{}; const auto nv_result = - nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline, ctrl); - if (ctrl.must_delay) { - ctrl.fresh_call = false; - ctx.SleepClientThread( - "NVServices::DelayedResponse", ctrl.timeout, - [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_, - Kernel::ThreadWakeupReason reason) { - IoctlCtrl ctrl2{ctrl}; - std::vector<u8> tmp_output = output_buffer; - std::vector<u8> tmp_output2 = output_buffer; - const auto nv_result2 = - nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output2, ctrl2); - - if (command.is_out != 0) { - ctx.WriteBuffer(tmp_output, 0); - ctx.WriteBuffer(tmp_output2, 1); - } - - IPC::ResponseBuilder rb{ctx_, 3}; - rb.Push(RESULT_SUCCESS); - rb.PushEnum(nv_result2); - }, - nvdrv->GetEventWriteable(ctrl.event_id)); - } else { - if (command.is_out != 0) { - ctx.WriteBuffer(output_buffer, 0); - ctx.WriteBuffer(output_buffer_inline, 1); - } + nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline); + if (command.is_out != 0) { + ctx.WriteBuffer(output_buffer, 0); + ctx.WriteBuffer(output_buffer_inline, 1); } IPC::ResponseBuilder rb{ctx, 3}; diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h index a3c4ecd85..3294bc0e7 100644 --- a/src/core/hle/service/nvdrv/nvdata.h +++ b/src/core/hle/service/nvdrv/nvdata.h @@ -97,15 +97,4 @@ union Ioctl { BitField<31, 1, u32> is_out; }; -struct IoctlCtrl { - // First call done to the servioce for services that call itself again after a call. - bool fresh_call{true}; - // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep - bool must_delay{}; - // Timeout for the delay - s64 timeout{}; - // NV Event Id - s32 event_id{-1}; -}; - } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 8e0c9f093..e03195afe 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -91,7 +91,7 @@ DeviceFD Module::Open(const std::string& device_name) { } NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, - std::vector<u8>& output, IoctlCtrl& ctrl) { + std::vector<u8>& output) { if (fd < 0) { LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); return NvResult::InvalidState; @@ -104,12 +104,11 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input return NvResult::NotImplemented; } - return itr->second->Ioctl1(command, input, output, ctrl); + return itr->second->Ioctl1(command, input, output); } NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, - IoctlCtrl& ctrl) { + const std::vector<u8>& inline_input, std::vector<u8>& output) { if (fd < 0) { LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); return NvResult::InvalidState; @@ -122,11 +121,11 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input return NvResult::NotImplemented; } - return itr->second->Ioctl2(command, input, inline_input, output, ctrl); + return itr->second->Ioctl2(command, input, inline_input, output); } NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, - std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl) { + std::vector<u8>& output, std::vector<u8>& inline_output) { if (fd < 0) { LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); return NvResult::InvalidState; @@ -139,7 +138,7 @@ NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input return NvResult::NotImplemented; } - return itr->second->Ioctl3(command, input, output, inline_output, ctrl); + return itr->second->Ioctl3(command, input, output, inline_output); } NvResult Module::Close(DeviceFD fd) { diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 5985d2179..144e657e5 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -119,13 +119,13 @@ public: /// Sends an ioctl command to the specified file descriptor. NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, - std::vector<u8>& output, IoctlCtrl& ctrl); + std::vector<u8>& output); NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, - const std::vector<u8>& inline_input, std::vector<u8>& output, IoctlCtrl& ctrl); + const std::vector<u8>& inline_input, std::vector<u8>& output); NvResult Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, - std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl); + std::vector<u8>& output, std::vector<u8>& inline_output); /// Closes a device file descriptor and returns operation success. NvResult Close(DeviceFD fd); diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index b89a2d41b..c8c6a4d64 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -22,10 +22,16 @@ BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id) BufferQueue::~BufferQueue() = default; void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) { + ASSERT(slot < buffer_slots); LOG_WARNING(Service, "Adding graphics buffer {}", slot); - free_buffers.push_back(slot); - queue.push_back({ + { + std::unique_lock lock{queue_mutex}; + free_buffers.push_back(slot); + } + condition.notify_one(); + + buffers[slot] = { .slot = slot, .status = Buffer::Status::Free, .igbp_buffer = igbp_buffer, @@ -33,127 +39,139 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) .crop_rect = {}, .swap_interval = 0, .multi_fence = {}, - }); + }; buffer_wait_event.writable->Signal(); } std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, u32 height) { + // Wait for first request before trying to dequeue + { + std::unique_lock lock{queue_mutex}; + condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; }); + } - if (free_buffers.empty()) { + if (!is_connect) { + // Buffer was disconnected while the thread was blocked, this is most likely due to + // emulation being stopped return std::nullopt; } + std::unique_lock lock{queue_mutex}; + auto f_itr = free_buffers.begin(); - auto itr = queue.end(); + auto slot = buffers.size(); while (f_itr != free_buffers.end()) { - auto slot = *f_itr; - itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { - // Only consider free buffers. Buffers become free once again after they've been - // Acquired and Released by the compositor, see the NVFlinger::Compose method. - if (buffer.status != Buffer::Status::Free) { - return false; - } - - if (buffer.slot != slot) { - return false; - } - - // Make sure that the parameters match. - return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; - }); - - if (itr != queue.end()) { + const Buffer& buffer = buffers[*f_itr]; + if (buffer.status == Buffer::Status::Free && buffer.igbp_buffer.width == width && + buffer.igbp_buffer.height == height) { + slot = *f_itr; free_buffers.erase(f_itr); break; } ++f_itr; } - - if (itr == queue.end()) { + if (slot == buffers.size()) { return std::nullopt; } - - itr->status = Buffer::Status::Dequeued; - return {{itr->slot, &itr->multi_fence}}; + buffers[slot].status = Buffer::Status::Dequeued; + return {{buffers[slot].slot, &buffers[slot].multi_fence}}; } const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { - auto itr = std::find_if(queue.begin(), queue.end(), - [&](const Buffer& buffer) { return buffer.slot == slot; }); - ASSERT(itr != queue.end()); - ASSERT(itr->status == Buffer::Status::Dequeued); - return itr->igbp_buffer; + ASSERT(slot < buffers.size()); + ASSERT(buffers[slot].status == Buffer::Status::Dequeued); + ASSERT(buffers[slot].slot == slot); + + return buffers[slot].igbp_buffer; } void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, const Common::Rectangle<int>& crop_rect, u32 swap_interval, Service::Nvidia::MultiFence& multi_fence) { - auto itr = std::find_if(queue.begin(), queue.end(), - [&](const Buffer& buffer) { return buffer.slot == slot; }); - ASSERT(itr != queue.end()); - ASSERT(itr->status == Buffer::Status::Dequeued); - itr->status = Buffer::Status::Queued; - itr->transform = transform; - itr->crop_rect = crop_rect; - itr->swap_interval = swap_interval; - itr->multi_fence = multi_fence; + ASSERT(slot < buffers.size()); + ASSERT(buffers[slot].status == Buffer::Status::Dequeued); + ASSERT(buffers[slot].slot == slot); + + buffers[slot].status = Buffer::Status::Queued; + buffers[slot].transform = transform; + buffers[slot].crop_rect = crop_rect; + buffers[slot].swap_interval = swap_interval; + buffers[slot].multi_fence = multi_fence; queue_sequence.push_back(slot); } void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence) { - const auto itr = std::find_if(queue.begin(), queue.end(), - [slot](const Buffer& buffer) { return buffer.slot == slot; }); - ASSERT(itr != queue.end()); - ASSERT(itr->status != Buffer::Status::Free); - itr->status = Buffer::Status::Free; - itr->multi_fence = multi_fence; - itr->swap_interval = 0; + ASSERT(slot < buffers.size()); + ASSERT(buffers[slot].status != Buffer::Status::Free); + ASSERT(buffers[slot].slot == slot); - free_buffers.push_back(slot); + buffers[slot].status = Buffer::Status::Free; + buffers[slot].multi_fence = multi_fence; + buffers[slot].swap_interval = 0; + + { + std::unique_lock lock{queue_mutex}; + free_buffers.push_back(slot); + } + condition.notify_one(); buffer_wait_event.writable->Signal(); } std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { - auto itr = queue.end(); + std::size_t buffer_slot = buffers.size(); // Iterate to find a queued buffer matching the requested slot. - while (itr == queue.end() && !queue_sequence.empty()) { - const u32 slot = queue_sequence.front(); - itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) { - return buffer.status == Buffer::Status::Queued && buffer.slot == slot; - }); + while (buffer_slot == buffers.size() && !queue_sequence.empty()) { + const auto slot = static_cast<std::size_t>(queue_sequence.front()); + ASSERT(slot < buffers.size()); + if (buffers[slot].status == Buffer::Status::Queued) { + ASSERT(buffers[slot].slot == slot); + buffer_slot = slot; + } queue_sequence.pop_front(); } - if (itr == queue.end()) { + if (buffer_slot == buffers.size()) { return std::nullopt; } - itr->status = Buffer::Status::Acquired; - return *itr; + buffers[buffer_slot].status = Buffer::Status::Acquired; + return {{buffers[buffer_slot]}}; } void BufferQueue::ReleaseBuffer(u32 slot) { - auto itr = std::find_if(queue.begin(), queue.end(), - [&](const Buffer& buffer) { return buffer.slot == slot; }); - ASSERT(itr != queue.end()); - ASSERT(itr->status == Buffer::Status::Acquired); - itr->status = Buffer::Status::Free; - free_buffers.push_back(slot); + ASSERT(slot < buffers.size()); + ASSERT(buffers[slot].status == Buffer::Status::Acquired); + ASSERT(buffers[slot].slot == slot); + + buffers[slot].status = Buffer::Status::Free; + { + std::unique_lock lock{queue_mutex}; + free_buffers.push_back(slot); + } + condition.notify_one(); buffer_wait_event.writable->Signal(); } -void BufferQueue::Disconnect() { - queue.clear(); +void BufferQueue::Connect() { queue_sequence.clear(); id = 1; layer_id = 1; + is_connect = true; +} + +void BufferQueue::Disconnect() { + buffers.fill({}); + queue_sequence.clear(); + buffer_wait_event.writable->Signal(); + is_connect = false; + condition.notify_one(); } u32 BufferQueue::Query(QueryType type) { - LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type)); + LOG_WARNING(Service, "(STUBBED) called type={}", type); switch (type) { case QueryType::NativeWindowFormat: diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index e7517c7e1..a2f60d9eb 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -4,7 +4,9 @@ #pragma once +#include <condition_variable> #include <list> +#include <mutex> #include <optional> #include <vector> @@ -21,6 +23,7 @@ class KernelCore; namespace Service::NVFlinger { +constexpr u32 buffer_slots = 0x40; struct IGBPBuffer { u32_le magic; u32_le width; @@ -98,6 +101,7 @@ public: void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence); std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); void ReleaseBuffer(u32 slot); + void Connect(); void Disconnect(); u32 Query(QueryType type); @@ -105,18 +109,28 @@ public: return id; } + bool IsConnected() const { + return is_connect; + } + std::shared_ptr<Kernel::WritableEvent> GetWritableBufferWaitEvent() const; std::shared_ptr<Kernel::ReadableEvent> GetBufferWaitEvent() const; private: - u32 id; - u64 layer_id; + BufferQueue(const BufferQueue&) = delete; + + u32 id{}; + u64 layer_id{}; + std::atomic_bool is_connect{}; std::list<u32> free_buffers; - std::vector<Buffer> queue; + std::array<Buffer, buffer_slots> buffers; std::list<u32> queue_sequence; Kernel::EventPair buffer_wait_event; + + std::mutex queue_mutex; + std::condition_variable condition; }; } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 44aa2bdae..4b3581949 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -88,6 +88,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { } NVFlinger::~NVFlinger() { + for (auto& buffer_queue : buffer_queues) { + buffer_queue->Disconnect(); + } + if (system.IsMulticore()) { is_running = false; wait_event->Set(); @@ -104,6 +108,8 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { } std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { + const auto guard = Lock(); + LOG_DEBUG(Service, "Opening \"{}\" display", name); // TODO(Subv): Currently we only support the Default display. @@ -121,6 +127,7 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { } std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { + const auto guard = Lock(); auto* const display = FindDisplay(display_id); if (display == nullptr) { @@ -129,18 +136,22 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { const u64 layer_id = next_layer_id++; const u32 buffer_queue_id = next_buffer_queue_id++; - buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id); - display->CreateLayer(layer_id, buffer_queues.back()); + buffer_queues.emplace_back( + std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id)); + display->CreateLayer(layer_id, *buffer_queues.back()); return layer_id; } void NVFlinger::CloseLayer(u64 layer_id) { + const auto guard = Lock(); + for (auto& display : displays) { display.CloseLayer(layer_id); } } std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { + const auto guard = Lock(); const auto* const layer = FindLayer(display_id, layer_id); if (layer == nullptr) { @@ -151,6 +162,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co } std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { + const auto guard = Lock(); auto* const display = FindDisplay(display_id); if (display == nullptr) { @@ -160,20 +172,16 @@ std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) return display->GetVSyncEvent(); } -BufferQueue& NVFlinger::FindBufferQueue(u32 id) { +BufferQueue* NVFlinger::FindBufferQueue(u32 id) { + const auto guard = Lock(); const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), - [id](const auto& queue) { return queue.GetId() == id; }); + [id](const auto& queue) { return queue->GetId() == id; }); - ASSERT(itr != buffer_queues.end()); - return *itr; -} - -const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const { - const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), - [id](const auto& queue) { return queue.GetId() == id; }); + if (itr == buffer_queues.end()) { + return nullptr; + } - ASSERT(itr != buffer_queues.end()); - return *itr; + return itr->get(); } VI::Display* NVFlinger::FindDisplay(u64 display_id) { diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 1ebe949c0..c6765259f 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -75,10 +75,7 @@ public: [[nodiscard]] std::shared_ptr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; /// Obtains a buffer queue identified by the ID. - [[nodiscard]] BufferQueue& FindBufferQueue(u32 id); - - /// Obtains a buffer queue identified by the ID. - [[nodiscard]] const BufferQueue& FindBufferQueue(u32 id) const; + [[nodiscard]] BufferQueue* FindBufferQueue(u32 id); /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when /// finished. @@ -86,11 +83,11 @@ public: [[nodiscard]] s64 GetNextTicks() const; +private: [[nodiscard]] std::unique_lock<std::mutex> Lock() const { return std::unique_lock{*guard}; } -private: /// Finds the display identified by the specified ID. [[nodiscard]] VI::Display* FindDisplay(u64 display_id); @@ -110,7 +107,7 @@ private: std::shared_ptr<Nvidia::Module> nvdrv; std::vector<VI::Display> displays; - std::vector<BufferQueue> buffer_queues; + std::vector<std::unique_ptr<BufferQueue>> buffer_queues; /// Id to use for the next layer that is created, this counter is shared among all displays. u64 next_layer_id = 1; diff --git a/src/core/hle/service/pcie/pcie.cpp b/src/core/hle/service/pcie/pcie.cpp index 80c0fc7ac..f6686fc4d 100644 --- a/src/core/hle/service/pcie/pcie.cpp +++ b/src/core/hle/service/pcie/pcie.cpp @@ -48,7 +48,7 @@ public: class PCIe final : public ServiceFramework<PCIe> { public: - explicit PCIe(Core::System& system_) : ServiceFramework{system, "pcie"} { + explicit PCIe(Core::System& system_) : ServiceFramework{system_, "pcie"} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "RegisterClassDriver"}, diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp index 392fda73e..b417624c9 100644 --- a/src/core/hle/service/prepo/prepo.cpp +++ b/src/core/hle/service/prepo/prepo.cpp @@ -65,7 +65,7 @@ private: } LOG_DEBUG(Service_PREPO, "called, type={:02X}, process_id={:016X}, data1_size={:016X}", - static_cast<u8>(Type), process_id, data[0].size()); + Type, process_id, data[0].size()); const auto& reporter{system.GetReporter()}; reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id); @@ -92,7 +92,7 @@ private: LOG_DEBUG( Service_PREPO, "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}", - static_cast<u8>(Type), user_id[1], user_id[0], process_id, data[0].size()); + Type, user_id[1], user_id[0], process_id, data[0].size()); const auto& reporter{system.GetReporter()}; reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id, diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index 360e0bf37..ff2a5b1db 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp @@ -95,9 +95,14 @@ ServiceFrameworkBase::ServiceFrameworkBase(Core::System& system_, const char* se : system{system_}, service_name{service_name_}, max_sessions{max_sessions_}, handler_invoker{handler_invoker_} {} -ServiceFrameworkBase::~ServiceFrameworkBase() = default; +ServiceFrameworkBase::~ServiceFrameworkBase() { + // Wait for other threads to release access before destroying + const auto guard = LockService(); +} void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) { + const auto guard = LockService(); + ASSERT(!port_installed); auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap(); @@ -106,6 +111,8 @@ void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) } void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) { + const auto guard = LockService(); + ASSERT(!port_installed); auto [server_port, client_port] = @@ -115,17 +122,6 @@ void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) { port_installed = true; } -std::shared_ptr<Kernel::ClientPort> ServiceFrameworkBase::CreatePort(Kernel::KernelCore& kernel) { - ASSERT(!port_installed); - - auto [server_port, client_port] = - Kernel::ServerPort::CreatePortPair(kernel, max_sessions, service_name); - auto port = MakeResult(std::move(server_port)).Unwrap(); - port->SetHleHandler(shared_from_this()); - port_installed = true; - return client_port; -} - void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) { handlers.reserve(handlers.size() + n); for (std::size_t i = 0; i < n; ++i) { @@ -164,6 +160,8 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) { } ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& context) { + const auto guard = LockService(); + switch (context.GetCommandType()) { case IPC::CommandType::Close: { IPC::ResponseBuilder rb{context, 2}; @@ -181,10 +179,14 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co break; } default: - UNIMPLEMENTED_MSG("command_type={}", static_cast<int>(context.GetCommandType())); + UNIMPLEMENTED_MSG("command_type={}", context.GetCommandType()); } - context.WriteToOutgoingCommandBuffer(context.GetThread()); + // If emulation was shutdown, we are closing service threads, do not write the response back to + // memory that may be shutting down as well. + if (system.IsPoweredOn()) { + context.WriteToOutgoingCommandBuffer(context.GetThread()); + } return RESULT_SUCCESS; } diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h index 62a182310..916445517 100644 --- a/src/core/hle/service/service.h +++ b/src/core/hle/service/service.h @@ -5,9 +5,11 @@ #pragma once #include <cstddef> +#include <mutex> #include <string> #include <boost/container/flat_map.hpp> #include "common/common_types.h" +#include "common/spin_lock.h" #include "core/hle/kernel/hle_ipc.h" #include "core/hle/kernel/object.h" @@ -68,11 +70,9 @@ public: void InstallAsService(SM::ServiceManager& service_manager); /// Creates a port pair and registers it on the kernel's global port registry. void InstallAsNamedPort(Kernel::KernelCore& kernel); - /// Creates and returns an unregistered port for the service. - std::shared_ptr<Kernel::ClientPort> CreatePort(Kernel::KernelCore& kernel); - + /// Invokes a service request routine. void InvokeRequest(Kernel::HLERequestContext& ctx); - + /// Handles a synchronization request for the service. ResultCode HandleSyncRequest(Kernel::HLERequestContext& context) override; protected: @@ -80,6 +80,11 @@ protected: template <typename Self> using HandlerFnP = void (Self::*)(Kernel::HLERequestContext&); + /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread. + [[nodiscard]] std::scoped_lock<Common::SpinLock> LockService() { + return std::scoped_lock{lock_service}; + } + /// System context that the service operates under. Core::System& system; @@ -115,6 +120,9 @@ private: /// Function used to safely up-cast pointers to the derived class before invoking a handler. InvokerFn* handler_invoker; boost::container::flat_map<u32, FunctionInfoBase> handlers; + + /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread. + Common::SpinLock lock_service; }; /** diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp index 19b8f113d..b58b2c8c5 100644 --- a/src/core/hle/service/set/set_sys.cpp +++ b/src/core/hle/service/set/set_sys.cpp @@ -34,9 +34,9 @@ void GetFirmwareVersionImpl(Kernel::HLERequestContext& ctx, GetFirmwareVersionTy // consistence (currently reports as 5.1.0-0.0) const auto archive = FileSys::SystemArchive::SystemVersion(); - const auto early_exit_failure = [&ctx](const std::string& desc, ResultCode code) { + const auto early_exit_failure = [&ctx](std::string_view desc, ResultCode code) { LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).", - desc.c_str()); + desc); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(code); }; diff --git a/src/core/hle/service/sockets/blocking_worker.h b/src/core/hle/service/sockets/blocking_worker.h deleted file mode 100644 index 2d53e52b6..000000000 --- a/src/core/hle/service/sockets/blocking_worker.h +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2020 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <atomic> -#include <memory> -#include <string> -#include <string_view> -#include <thread> -#include <variant> -#include <vector> - -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/microprofile.h" -#include "common/thread.h" -#include "core/core.h" -#include "core/hle/kernel/hle_ipc.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/kernel/writable_event.h" - -namespace Service::Sockets { - -/** - * Worker abstraction to execute blocking calls on host without blocking the guest thread - * - * @tparam Service Service where the work is executed - * @tparam Types Types of work to execute - */ -template <class Service, class... Types> -class BlockingWorker { - using This = BlockingWorker<Service, Types...>; - using WorkVariant = std::variant<std::monostate, Types...>; - -public: - /// Create a new worker - static std::unique_ptr<This> Create(Core::System& system, Service* service, - std::string_view name) { - return std::unique_ptr<This>(new This(system, service, name)); - } - - ~BlockingWorker() { - while (!is_available.load(std::memory_order_relaxed)) { - // Busy wait until work is finished - std::this_thread::yield(); - } - // Monostate means to exit the thread - work = std::monostate{}; - work_event.Set(); - thread.join(); - } - - /** - * Try to capture the worker to send work after a success - * @returns True when the worker has been successfully captured - */ - bool TryCapture() { - bool expected = true; - return is_available.compare_exchange_weak(expected, false, std::memory_order_relaxed, - std::memory_order_relaxed); - } - - /** - * Send work to this worker abstraction - * @see TryCapture must be called before attempting to call this function - */ - template <class Work> - void SendWork(Work new_work) { - ASSERT_MSG(!is_available, "Trying to send work on a worker that's not captured"); - work = std::move(new_work); - work_event.Set(); - } - - /// Generate a callback for @see SleepClientThread - template <class Work> - auto Callback() { - return [this](std::shared_ptr<Kernel::Thread>, Kernel::HLERequestContext& ctx, - Kernel::ThreadWakeupReason reason) { - ASSERT(reason == Kernel::ThreadWakeupReason::Signal); - std::get<Work>(work).Response(ctx); - is_available.store(true); - }; - } - - /// Get kernel event that will be signalled by the worker when the host operation finishes - std::shared_ptr<Kernel::WritableEvent> KernelEvent() const { - return kernel_event; - } - -private: - explicit BlockingWorker(Core::System& system, Service* service, std::string_view name) { - auto pair = Kernel::WritableEvent::CreateEventPair(system.Kernel(), std::string(name)); - kernel_event = std::move(pair.writable); - thread = std::thread([this, &system, service, name] { Run(system, service, name); }); - } - - void Run(Core::System& system, Service* service, std::string_view name) { - system.RegisterHostThread(); - - const std::string thread_name = fmt::format("yuzu:{}", name); - MicroProfileOnThreadCreate(thread_name.c_str()); - Common::SetCurrentThreadName(thread_name.c_str()); - - bool keep_running = true; - while (keep_running) { - work_event.Wait(); - - const auto visit_fn = [service, &keep_running]<typename T>(T&& w) { - if constexpr (std::is_same_v<std::decay_t<T>, std::monostate>) { - keep_running = false; - } else { - w.Execute(service); - } - }; - std::visit(visit_fn, work); - - kernel_event->Signal(); - } - } - - std::thread thread; - WorkVariant work; - Common::Event work_event; - std::shared_ptr<Kernel::WritableEvent> kernel_event; - std::atomic_bool is_available{true}; -}; - -template <class Service, class... Types> -class BlockingWorkerPool { - using Worker = BlockingWorker<Service, Types...>; - -public: - explicit BlockingWorkerPool(Core::System& system_, Service* service_) - : system{system_}, service{service_} {} - - /// Returns a captured worker thread, creating new ones if necessary - Worker* CaptureWorker() { - for (auto& worker : workers) { - if (worker->TryCapture()) { - return worker.get(); - } - } - auto new_worker = Worker::Create(system, service, fmt::format("BSD:{}", workers.size())); - [[maybe_unused]] const bool success = new_worker->TryCapture(); - ASSERT(success); - - return workers.emplace_back(std::move(new_worker)).get(); - } - -private: - Core::System& system; - Service* const service; - - std::vector<std::unique_ptr<Worker>> workers; -}; - -} // namespace Service::Sockets diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index a9875b9a6..2b824059d 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp @@ -30,7 +30,7 @@ bool IsConnectionBased(Type type) { case Type::DGRAM: return false; default: - UNIMPLEMENTED_MSG("Unimplemented type={}", static_cast<int>(type)); + UNIMPLEMENTED_MSG("Unimplemented type={}", type); return false; } } @@ -178,13 +178,12 @@ void BSD::Poll(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service, "called. nfds={} timeout={}", nfds, timeout); - ExecuteWork(ctx, "BSD:Poll", timeout != 0, - PollWork{ - .nfds = nfds, - .timeout = timeout, - .read_buffer = ctx.ReadBuffer(), - .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), - }); + ExecuteWork(ctx, PollWork{ + .nfds = nfds, + .timeout = timeout, + .read_buffer = ctx.ReadBuffer(), + .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), + }); } void BSD::Accept(Kernel::HLERequestContext& ctx) { @@ -193,11 +192,10 @@ void BSD::Accept(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service, "called. fd={}", fd); - ExecuteWork(ctx, "BSD:Accept", IsBlockingSocket(fd), - AcceptWork{ - .fd = fd, - .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), - }); + ExecuteWork(ctx, AcceptWork{ + .fd = fd, + .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), + }); } void BSD::Bind(Kernel::HLERequestContext& ctx) { @@ -215,11 +213,10 @@ void BSD::Connect(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service, "called. fd={} addrlen={}", fd, ctx.GetReadBufferSize()); - ExecuteWork(ctx, "BSD:Connect", IsBlockingSocket(fd), - ConnectWork{ - .fd = fd, - .addr = ctx.ReadBuffer(), - }); + ExecuteWork(ctx, ConnectWork{ + .fd = fd, + .addr = ctx.ReadBuffer(), + }); } void BSD::GetPeerName(Kernel::HLERequestContext& ctx) { @@ -327,12 +324,11 @@ void BSD::Recv(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetWriteBufferSize()); - ExecuteWork(ctx, "BSD:Recv", IsBlockingSocket(fd), - RecvWork{ - .fd = fd, - .flags = flags, - .message = std::vector<u8>(ctx.GetWriteBufferSize()), - }); + ExecuteWork(ctx, RecvWork{ + .fd = fd, + .flags = flags, + .message = std::vector<u8>(ctx.GetWriteBufferSize()), + }); } void BSD::RecvFrom(Kernel::HLERequestContext& ctx) { @@ -344,13 +340,12 @@ void BSD::RecvFrom(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={} addrlen={}", fd, flags, ctx.GetWriteBufferSize(0), ctx.GetWriteBufferSize(1)); - ExecuteWork(ctx, "BSD:RecvFrom", IsBlockingSocket(fd), - RecvFromWork{ - .fd = fd, - .flags = flags, - .message = std::vector<u8>(ctx.GetWriteBufferSize(0)), - .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)), - }); + ExecuteWork(ctx, RecvFromWork{ + .fd = fd, + .flags = flags, + .message = std::vector<u8>(ctx.GetWriteBufferSize(0)), + .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)), + }); } void BSD::Send(Kernel::HLERequestContext& ctx) { @@ -361,12 +356,11 @@ void BSD::Send(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetReadBufferSize()); - ExecuteWork(ctx, "BSD:Send", IsBlockingSocket(fd), - SendWork{ - .fd = fd, - .flags = flags, - .message = ctx.ReadBuffer(), - }); + ExecuteWork(ctx, SendWork{ + .fd = fd, + .flags = flags, + .message = ctx.ReadBuffer(), + }); } void BSD::SendTo(Kernel::HLERequestContext& ctx) { @@ -377,13 +371,12 @@ void BSD::SendTo(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service, "called. fd={} flags=0x{} len={} addrlen={}", fd, flags, ctx.GetReadBufferSize(0), ctx.GetReadBufferSize(1)); - ExecuteWork(ctx, "BSD:SendTo", IsBlockingSocket(fd), - SendToWork{ - .fd = fd, - .flags = flags, - .message = ctx.ReadBuffer(0), - .addr = ctx.ReadBuffer(1), - }); + ExecuteWork(ctx, SendToWork{ + .fd = fd, + .flags = flags, + .message = ctx.ReadBuffer(0), + .addr = ctx.ReadBuffer(1), + }); } void BSD::Write(Kernel::HLERequestContext& ctx) { @@ -392,12 +385,11 @@ void BSD::Write(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service, "called. fd={} len={}", fd, ctx.GetReadBufferSize()); - ExecuteWork(ctx, "BSD:Write", IsBlockingSocket(fd), - SendWork{ - .fd = fd, - .flags = 0, - .message = ctx.ReadBuffer(), - }); + ExecuteWork(ctx, SendWork{ + .fd = fd, + .flags = 0, + .message = ctx.ReadBuffer(), + }); } void BSD::Close(Kernel::HLERequestContext& ctx) { @@ -410,24 +402,9 @@ void BSD::Close(Kernel::HLERequestContext& ctx) { } template <typename Work> -void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason, - bool is_blocking, Work work) { - if (!is_blocking) { - work.Execute(this); - work.Response(ctx); - return; - } - - // Signal a dummy response to make IPC validation happy - // This will be overwritten by the SleepClientThread callback +void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, Work work) { + work.Execute(this); work.Response(ctx); - - auto worker = worker_pool.CaptureWorker(); - - ctx.SleepClientThread(std::string(sleep_reason), std::numeric_limits<u64>::max(), - worker->Callback<Work>(), worker->KernelEvent()); - - worker->SendWork(std::move(work)); } std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protocol) { @@ -489,18 +466,18 @@ std::pair<s32, Errno> BSD::PollImpl(std::vector<u8>& write_buffer, std::vector<u } for (PollFD& pollfd : fds) { - ASSERT(pollfd.revents == 0); + ASSERT(False(pollfd.revents)); if (pollfd.fd > static_cast<s32>(MAX_FD) || pollfd.fd < 0) { LOG_ERROR(Service, "File descriptor handle={} is invalid", pollfd.fd); - pollfd.revents = 0; + pollfd.revents = PollEvents{}; return {0, Errno::SUCCESS}; } const std::optional<FileDescriptor>& descriptor = file_descriptors[pollfd.fd]; if (!descriptor) { LOG_ERROR(Service, "File descriptor handle={} is not allocated", pollfd.fd); - pollfd.revents = POLL_NVAL; + pollfd.revents = PollEvents::Nval; return {0, Errno::SUCCESS}; } } @@ -510,7 +487,7 @@ std::pair<s32, Errno> BSD::PollImpl(std::vector<u8>& write_buffer, std::vector<u Network::PollFD result; result.socket = file_descriptors[pollfd.fd]->socket.get(); result.events = TranslatePollEventsToHost(pollfd.events); - result.revents = 0; + result.revents = Network::PollEvents{}; return result; }); @@ -636,7 +613,7 @@ std::pair<s32, Errno> BSD::FcntlImpl(s32 fd, FcntlCmd cmd, s32 arg) { return {0, Errno::SUCCESS}; } default: - UNIMPLEMENTED_MSG("Unimplemented cmd={}", static_cast<int>(cmd)); + UNIMPLEMENTED_MSG("Unimplemented cmd={}", cmd); return {-1, Errno::SUCCESS}; } } @@ -679,7 +656,7 @@ Errno BSD::SetSockOptImpl(s32 fd, u32 level, OptName optname, size_t optlen, con case OptName::RCVTIMEO: return Translate(socket->SetRcvTimeo(value)); default: - UNIMPLEMENTED_MSG("Unimplemented optname={}", static_cast<int>(optname)); + UNIMPLEMENTED_MSG("Unimplemented optname={}", optname); return Errno::SUCCESS; } } @@ -807,18 +784,6 @@ bool BSD::IsFileDescriptorValid(s32 fd) const noexcept { return true; } -bool BSD::IsBlockingSocket(s32 fd) const noexcept { - // Inform invalid sockets as non-blocking - // This way we avoid using a worker thread as it will fail without blocking host - if (fd > static_cast<s32>(MAX_FD) || fd < 0) { - return false; - } - if (!file_descriptors[fd]) { - return false; - } - return (file_descriptors[fd]->flags & FLAG_O_NONBLOCK) != 0; -} - void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept { IPC::ResponseBuilder rb{ctx, 4}; @@ -827,8 +792,7 @@ void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) co rb.PushEnum(bsd_errno); } -BSD::BSD(Core::System& system_, const char* name) - : ServiceFramework{system_, name}, worker_pool{system_, this} { +BSD::BSD(Core::System& system_, const char* name) : ServiceFramework{system_, name} { // clang-format off static const FunctionInfo functions[] = { {0, &BSD::RegisterClient, "RegisterClient"}, diff --git a/src/core/hle/service/sockets/bsd.h b/src/core/hle/service/sockets/bsd.h index f14713fc4..6da0bfeb2 100644 --- a/src/core/hle/service/sockets/bsd.h +++ b/src/core/hle/service/sockets/bsd.h @@ -11,7 +11,6 @@ #include "common/common_types.h" #include "core/hle/kernel/hle_ipc.h" #include "core/hle/service/service.h" -#include "core/hle/service/sockets/blocking_worker.h" #include "core/hle/service/sockets/sockets.h" namespace Core { @@ -138,8 +137,7 @@ private: void Close(Kernel::HLERequestContext& ctx); template <typename Work> - void ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason, - bool is_blocking, Work work); + void ExecuteWork(Kernel::HLERequestContext& ctx, Work work); std::pair<s32, Errno> SocketImpl(Domain domain, Type type, Protocol protocol); std::pair<s32, Errno> PollImpl(std::vector<u8>& write_buffer, std::vector<u8> read_buffer, @@ -163,15 +161,10 @@ private: s32 FindFreeFileDescriptorHandle() noexcept; bool IsFileDescriptorValid(s32 fd) const noexcept; - bool IsBlockingSocket(s32 fd) const noexcept; void BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept; std::array<std::optional<FileDescriptor>, MAX_FD> file_descriptors; - - BlockingWorkerPool<BSD, PollWork, AcceptWork, ConnectWork, RecvWork, RecvFromWork, SendWork, - SendToWork> - worker_pool; }; class BSDCFG final : public ServiceFramework<BSDCFG> { diff --git a/src/core/hle/service/sockets/sockets.h b/src/core/hle/service/sockets/sockets.h index 89a410076..5a65ed2a9 100644 --- a/src/core/hle/service/sockets/sockets.h +++ b/src/core/hle/service/sockets/sockets.h @@ -69,10 +69,22 @@ struct SockAddrIn { std::array<u8, 8> zeroes; }; +enum class PollEvents : u16 { + // Using Pascal case because IN is a macro on Windows. + In = 1 << 0, + Pri = 1 << 1, + Out = 1 << 2, + Err = 1 << 3, + Hup = 1 << 4, + Nval = 1 << 5, +}; + +DECLARE_ENUM_FLAG_OPERATORS(PollEvents); + struct PollFD { s32 fd; - u16 events; - u16 revents; + PollEvents events; + PollEvents revents; }; struct Linger { @@ -80,13 +92,6 @@ struct Linger { u32 linger; }; -constexpr u16 POLL_IN = 0x01; -constexpr u16 POLL_PRI = 0x02; -constexpr u16 POLL_OUT = 0x04; -constexpr u16 POLL_ERR = 0x08; -constexpr u16 POLL_HUP = 0x10; -constexpr u16 POLL_NVAL = 0x20; - constexpr u32 FLAG_MSG_DONTWAIT = 0x80; constexpr u32 FLAG_O_NONBLOCK = 0x800; diff --git a/src/core/hle/service/sockets/sockets_translate.cpp b/src/core/hle/service/sockets/sockets_translate.cpp index 2e626fd86..c822d21b8 100644 --- a/src/core/hle/service/sockets/sockets_translate.cpp +++ b/src/core/hle/service/sockets/sockets_translate.cpp @@ -27,7 +27,7 @@ Errno Translate(Network::Errno value) { case Network::Errno::NOTCONN: return Errno::NOTCONN; default: - UNIMPLEMENTED_MSG("Unimplemented errno={}", static_cast<int>(value)); + UNIMPLEMENTED_MSG("Unimplemented errno={}", value); return Errno::SUCCESS; } } @@ -41,7 +41,7 @@ Network::Domain Translate(Domain domain) { case Domain::INET: return Network::Domain::INET; default: - UNIMPLEMENTED_MSG("Unimplemented domain={}", static_cast<int>(domain)); + UNIMPLEMENTED_MSG("Unimplemented domain={}", domain); return {}; } } @@ -51,7 +51,7 @@ Domain Translate(Network::Domain domain) { case Network::Domain::INET: return Domain::INET; default: - UNIMPLEMENTED_MSG("Unimplemented domain={}", static_cast<int>(domain)); + UNIMPLEMENTED_MSG("Unimplemented domain={}", domain); return {}; } } @@ -63,7 +63,7 @@ Network::Type Translate(Type type) { case Type::DGRAM: return Network::Type::DGRAM; default: - UNIMPLEMENTED_MSG("Unimplemented type={}", static_cast<int>(type)); + UNIMPLEMENTED_MSG("Unimplemented type={}", type); } } @@ -84,48 +84,48 @@ Network::Protocol Translate(Type type, Protocol protocol) { case Protocol::UDP: return Network::Protocol::UDP; default: - UNIMPLEMENTED_MSG("Unimplemented protocol={}", static_cast<int>(protocol)); + UNIMPLEMENTED_MSG("Unimplemented protocol={}", protocol); return Network::Protocol::TCP; } } -u16 TranslatePollEventsToHost(u32 flags) { - u32 result = 0; - const auto translate = [&result, &flags](u32 from, u32 to) { - if ((flags & from) != 0) { +Network::PollEvents TranslatePollEventsToHost(PollEvents flags) { + Network::PollEvents result{}; + const auto translate = [&result, &flags](PollEvents from, Network::PollEvents to) { + if (True(flags & from)) { flags &= ~from; result |= to; } }; - translate(POLL_IN, Network::POLL_IN); - translate(POLL_PRI, Network::POLL_PRI); - translate(POLL_OUT, Network::POLL_OUT); - translate(POLL_ERR, Network::POLL_ERR); - translate(POLL_HUP, Network::POLL_HUP); - translate(POLL_NVAL, Network::POLL_NVAL); - - UNIMPLEMENTED_IF_MSG(flags != 0, "Unimplemented flags={}", flags); - return static_cast<u16>(result); + translate(PollEvents::In, Network::PollEvents::In); + translate(PollEvents::Pri, Network::PollEvents::Pri); + translate(PollEvents::Out, Network::PollEvents::Out); + translate(PollEvents::Err, Network::PollEvents::Err); + translate(PollEvents::Hup, Network::PollEvents::Hup); + translate(PollEvents::Nval, Network::PollEvents::Nval); + + UNIMPLEMENTED_IF_MSG((u16)flags != 0, "Unimplemented flags={}", (u16)flags); + return result; } -u16 TranslatePollEventsToGuest(u32 flags) { - u32 result = 0; - const auto translate = [&result, &flags](u32 from, u32 to) { - if ((flags & from) != 0) { +PollEvents TranslatePollEventsToGuest(Network::PollEvents flags) { + PollEvents result{}; + const auto translate = [&result, &flags](Network::PollEvents from, PollEvents to) { + if (True(flags & from)) { flags &= ~from; result |= to; } }; - translate(Network::POLL_IN, POLL_IN); - translate(Network::POLL_PRI, POLL_PRI); - translate(Network::POLL_OUT, POLL_OUT); - translate(Network::POLL_ERR, POLL_ERR); - translate(Network::POLL_HUP, POLL_HUP); - translate(Network::POLL_NVAL, POLL_NVAL); + translate(Network::PollEvents::In, PollEvents::In); + translate(Network::PollEvents::Pri, PollEvents::Pri); + translate(Network::PollEvents::Out, PollEvents::Out); + translate(Network::PollEvents::Err, PollEvents::Err); + translate(Network::PollEvents::Hup, PollEvents::Hup); + translate(Network::PollEvents::Nval, PollEvents::Nval); - UNIMPLEMENTED_IF_MSG(flags != 0, "Unimplemented flags={}", flags); - return static_cast<u16>(result); + UNIMPLEMENTED_IF_MSG((u16)flags != 0, "Unimplemented flags={}", (u16)flags); + return result; } Network::SockAddrIn Translate(SockAddrIn value) { @@ -157,7 +157,7 @@ Network::ShutdownHow Translate(ShutdownHow how) { case ShutdownHow::RDWR: return Network::ShutdownHow::RDWR; default: - UNIMPLEMENTED_MSG("Unimplemented how={}", static_cast<int>(how)); + UNIMPLEMENTED_MSG("Unimplemented how={}", how); return {}; } } diff --git a/src/core/hle/service/sockets/sockets_translate.h b/src/core/hle/service/sockets/sockets_translate.h index e498913d4..057d1ff22 100644 --- a/src/core/hle/service/sockets/sockets_translate.h +++ b/src/core/hle/service/sockets/sockets_translate.h @@ -31,10 +31,10 @@ Network::Type Translate(Type type); Network::Protocol Translate(Type type, Protocol protocol); /// Translate abstract poll event flags to guest poll event flags -u16 TranslatePollEventsToHost(u32 flags); +Network::PollEvents TranslatePollEventsToHost(PollEvents flags); /// Translate guest poll event flags to abstract poll event flags -u16 TranslatePollEventsToGuest(u32 flags); +PollEvents TranslatePollEventsToGuest(Network::PollEvents flags); /// Translate guest socket address structure to abstract socket address structure Network::SockAddrIn Translate(SockAddrIn value); diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 7b7ac282d..abc753d5d 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -10,8 +10,8 @@ #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/service/time/interface.h" #include "core/hle/service/time/time.h" #include "core/hle/service/time/time_sharedmemory.h" diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index af5b8b0b9..968cd16b6 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -282,18 +282,24 @@ public: void DeserializeData() override { [[maybe_unused]] const std::u16string token = ReadInterfaceToken(); data = Read<Data>(); - buffer = Read<NVFlinger::IGBPBuffer>(); + if (data.contains_object != 0) { + buffer_container = Read<BufferContainer>(); + } } struct Data { u32_le slot; - INSERT_PADDING_WORDS(1); + u32_le contains_object; + }; + + struct BufferContainer { u32_le graphic_buffer_length; INSERT_PADDING_WORDS(1); + NVFlinger::IGBPBuffer buffer{}; }; - Data data; - NVFlinger::IGBPBuffer buffer; + Data data{}; + BufferContainer buffer_container{}; }; class IGBPSetPreallocatedBufferResponseParcel : public Parcel { @@ -528,10 +534,9 @@ private: const u32 flags = rp.Pop<u32>(); LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, - static_cast<u32>(transaction), flags); + transaction, flags); - const auto guard = nv_flinger.Lock(); - auto& buffer_queue = nv_flinger.FindBufferQueue(id); + auto& buffer_queue = *nv_flinger.FindBufferQueue(id); switch (transaction) { case TransactionId::Connect: { @@ -541,13 +546,16 @@ private: Settings::values.resolution_factor.GetValue()), static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * Settings::values.resolution_factor.GetValue())}; + + buffer_queue.Connect(); + ctx.WriteBuffer(response.Serialize()); break; } case TransactionId::SetPreallocatedBuffer: { IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; - buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); + buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer_container.buffer); IGBPSetPreallocatedBufferResponseParcel response{}; ctx.WriteBuffer(response.Serialize()); @@ -557,40 +565,25 @@ private: IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; const u32 width{request.data.width}; const u32 height{request.data.height}; - auto result = buffer_queue.DequeueBuffer(width, height); - - if (result) { - // Buffer is available - IGBPDequeueBufferResponseParcel response{result->first, *result->second}; - ctx.WriteBuffer(response.Serialize()); - } else { - // Wait the current thread until a buffer becomes available - ctx.SleepClientThread( - "IHOSBinderDriver::DequeueBuffer", UINT64_MAX, - [=, this](std::shared_ptr<Kernel::Thread> thread, - Kernel::HLERequestContext& ctx, Kernel::ThreadWakeupReason reason) { - // Repeat TransactParcel DequeueBuffer when a buffer is available - const auto guard = nv_flinger.Lock(); - auto& buffer_queue = nv_flinger.FindBufferQueue(id); - auto result = buffer_queue.DequeueBuffer(width, height); - ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer."); - - IGBPDequeueBufferResponseParcel response{result->first, *result->second}; - ctx.WriteBuffer(response.Serialize()); - IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(RESULT_SUCCESS); - }, - buffer_queue.GetWritableBufferWaitEvent()); - } + + do { + if (auto result = buffer_queue.DequeueBuffer(width, height); result) { + // Buffer is available + IGBPDequeueBufferResponseParcel response{result->first, *result->second}; + ctx.WriteBuffer(response.Serialize()); + break; + } + } while (buffer_queue.IsConnected()); + break; } case TransactionId::RequestBuffer: { IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; auto& buffer = buffer_queue.RequestBuffer(request.slot); - IGBPRequestBufferResponseParcel response{buffer}; ctx.WriteBuffer(response.Serialize()); + break; } case TransactionId::QueueBuffer: { @@ -676,7 +669,7 @@ private: LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); - const auto& buffer_queue = nv_flinger.FindBufferQueue(id); + const auto& buffer_queue = *nv_flinger.FindBufferQueue(id); // TODO(Subv): Find out what this actually is. IPC::ResponseBuilder rb{ctx, 2, 1}; @@ -1066,8 +1059,8 @@ private: const auto scaling_mode = rp.PopEnum<NintendoScaleMode>(); const u64 unknown = rp.Pop<u64>(); - LOG_DEBUG(Service_VI, "called. scaling_mode=0x{:08X}, unknown=0x{:016X}", - static_cast<u32>(scaling_mode), unknown); + LOG_DEBUG(Service_VI, "called. scaling_mode=0x{:08X}, unknown=0x{:016X}", scaling_mode, + unknown); IPC::ResponseBuilder rb{ctx, 2}; @@ -1210,7 +1203,7 @@ private: void ConvertScalingMode(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto mode = rp.PopEnum<NintendoScaleMode>(); - LOG_DEBUG(Service_VI, "called mode={}", static_cast<u32>(mode)); + LOG_DEBUG(Service_VI, "called mode={}", mode); const auto converted_mode = ConvertScalingModeImpl(mode); @@ -1230,8 +1223,8 @@ private: const auto height = rp.Pop<u64>(); LOG_DEBUG(Service_VI, "called width={}, height={}", width, height); - constexpr std::size_t base_size = 0x20000; - constexpr std::size_t alignment = 0x1000; + constexpr u64 base_size = 0x20000; + constexpr u64 alignment = 0x1000; const auto texture_size = width * height * 4; const auto out_size = (texture_size + base_size - 1) / base_size * base_size; @@ -1311,7 +1304,7 @@ void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx, Core::System& const auto policy = rp.PopEnum<Policy>(); if (!IsValidServiceAccess(permission, policy)) { - LOG_ERROR(Service_VI, "Permission denied for policy {}", static_cast<u32>(policy)); + LOG_ERROR(Service_VI, "Permission denied for policy {}", policy); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERR_PERMISSION_DENIED); return; diff --git a/src/core/loader/deconstructed_rom_directory.h b/src/core/loader/deconstructed_rom_directory.h index 35d340317..3c968580f 100644 --- a/src/core/loader/deconstructed_rom_directory.h +++ b/src/core/loader/deconstructed_rom_directory.h @@ -32,7 +32,7 @@ public: /** * Returns the type of the file - * @param file std::shared_ptr<VfsFile> open file + * @param file open file * @return FileType found, or FileType::Error if this loader doesn't know it */ static FileType IdentifyType(const FileSys::VirtualFile& file); diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h index 3527933ad..2067932c7 100644 --- a/src/core/loader/elf.h +++ b/src/core/loader/elf.h @@ -21,7 +21,7 @@ public: /** * Returns the type of the file - * @param file std::shared_ptr<VfsFile> open file + * @param file open file * @return FileType found, or FileType::Error if this loader doesn't know it */ static FileType IdentifyType(const FileSys::VirtualFile& file); diff --git a/src/core/loader/kip.h b/src/core/loader/kip.h index dee05a7b5..14a85e295 100644 --- a/src/core/loader/kip.h +++ b/src/core/loader/kip.h @@ -23,7 +23,7 @@ public: /** * Returns the type of the file - * @param file std::shared_ptr<VfsFile> open file + * @param file open file * @return FileType found, or FileType::Error if this loader doesn't know it */ static FileType IdentifyType(const FileSys::VirtualFile& file); diff --git a/src/core/loader/nax.h b/src/core/loader/nax.h index c2b7722b5..a5b5e2ae1 100644 --- a/src/core/loader/nax.h +++ b/src/core/loader/nax.h @@ -28,7 +28,7 @@ public: /** * Returns the type of the file - * @param file std::shared_ptr<VfsFile> open file + * @param file open file * @return FileType found, or FileType::Error if this loader doesn't know it */ static FileType IdentifyType(const FileSys::VirtualFile& file); diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h index 711070294..918792800 100644 --- a/src/core/loader/nca.h +++ b/src/core/loader/nca.h @@ -28,7 +28,7 @@ public: /** * Returns the type of the file - * @param file std::shared_ptr<VfsFile> open file + * @param file open file * @return FileType found, or FileType::Error if this loader doesn't know it */ static FileType IdentifyType(const FileSys::VirtualFile& file); diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h index a2aab2ecc..a82b66221 100644 --- a/src/core/loader/nro.h +++ b/src/core/loader/nro.h @@ -32,7 +32,7 @@ public: /** * Returns the type of the file - * @param file std::shared_ptr<VfsFile> open file + * @param file open file * @return FileType found, or FileType::Error if this loader doesn't know it */ static FileType IdentifyType(const FileSys::VirtualFile& file); diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h index d331096ae..3af461b5f 100644 --- a/src/core/loader/nso.h +++ b/src/core/loader/nso.h @@ -75,7 +75,7 @@ public: /** * Returns the type of the file - * @param file std::shared_ptr<VfsFile> open file + * @param file open file * @return FileType found, or FileType::Error if this loader doesn't know it */ static FileType IdentifyType(const FileSys::VirtualFile& file); diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h index f0518ac47..d48d87f2c 100644 --- a/src/core/loader/nsp.h +++ b/src/core/loader/nsp.h @@ -34,7 +34,7 @@ public: /** * Returns the type of the file - * @param file std::shared_ptr<VfsFile> open file + * @param file open file * @return FileType found, or FileType::Error if this loader doesn't know it */ static FileType IdentifyType(const FileSys::VirtualFile& file); diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h index 764dc8328..9f0ceb5ef 100644 --- a/src/core/loader/xci.h +++ b/src/core/loader/xci.h @@ -34,7 +34,7 @@ public: /** * Returns the type of the file - * @param file std::shared_ptr<VfsFile> open file + * @param file open file * @return FileType found, or FileType::Error if this loader doesn't know it */ static FileType IdentifyType(const FileSys::VirtualFile& file); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index b88aa5c40..54a848936 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -4,6 +4,7 @@ #include <algorithm> #include <cstring> +#include <mutex> #include <optional> #include <utility> @@ -497,7 +498,21 @@ struct Memory::Impl { return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size); } + struct PageEntry { + u8* const pointer; + const Common::PageType attribute; + }; + + PageEntry SafePageEntry(std::size_t base) const { + std::lock_guard lock{rasterizer_cache_guard}; + return { + .pointer = current_page_table->pointers[base], + .attribute = current_page_table->attributes[base], + }; + } + void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { + std::lock_guard lock{rasterizer_cache_guard}; if (vaddr == 0) { return; } @@ -630,16 +645,22 @@ struct Memory::Impl { */ template <typename T> T Read(const VAddr vaddr) { - const u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; - if (page_pointer != nullptr) { - // NOTE: Avoid adding any extra logic to this fast-path block + // Avoid adding any extra logic to this fast-path block + if (const u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) { T value; - std::memcpy(&value, &page_pointer[vaddr], sizeof(T)); + std::memcpy(&value, &pointer[vaddr], sizeof(T)); return value; } - const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; - switch (type) { + // Otherwise, we need to grab the page with a lock, in case it is currently being modified + const auto entry = SafePageEntry(vaddr >> PAGE_BITS); + if (entry.pointer) { + T value; + std::memcpy(&value, &entry.pointer[vaddr], sizeof(T)); + return value; + } + + switch (entry.attribute) { case Common::PageType::Unmapped: LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); return 0; @@ -667,20 +688,24 @@ struct Memory::Impl { * @tparam T The data type to write to memory. This type *must* be * trivially copyable, otherwise the behavior of this function * is undefined. - * - * @returns The instance of T write to the specified virtual address. */ template <typename T> void Write(const VAddr vaddr, const T data) { - u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; - if (page_pointer != nullptr) { - // NOTE: Avoid adding any extra logic to this fast-path block - std::memcpy(&page_pointer[vaddr], &data, sizeof(T)); + // Avoid adding any extra logic to this fast-path block + if (u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) { + std::memcpy(&pointer[vaddr], &data, sizeof(T)); return; } - const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; - switch (type) { + // Otherwise, we need to grab the page with a lock, in case it is currently being modified + const auto entry = SafePageEntry(vaddr >> PAGE_BITS); + if (entry.pointer) { + // Memory was mapped, we are done + std::memcpy(&entry.pointer[vaddr], &data, sizeof(T)); + return; + } + + switch (entry.attribute) { case Common::PageType::Unmapped: LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, static_cast<u32>(data), vaddr); @@ -758,6 +783,7 @@ struct Memory::Impl { return true; } + mutable std::mutex rasterizer_cache_guard; Common::PageTable* current_page_table = nullptr; Core::System& system; }; diff --git a/src/core/network/network.cpp b/src/core/network/network.cpp index 5ef2e8511..681e93468 100644 --- a/src/core/network/network.cpp +++ b/src/core/network/network.cpp @@ -11,7 +11,7 @@ #ifdef _WIN32 #define _WINSOCK_DEPRECATED_NO_WARNINGS // gethostname #include <winsock2.h> -#elif __unix__ +#elif YUZU_UNIX #include <errno.h> #include <fcntl.h> #include <netdb.h> @@ -54,7 +54,7 @@ constexpr IPv4Address TranslateIPv4(in_addr addr) { sockaddr TranslateFromSockAddrIn(SockAddrIn input) { sockaddr_in result; -#ifdef __unix__ +#if YUZU_UNIX result.sin_len = sizeof(result); #endif @@ -63,7 +63,7 @@ sockaddr TranslateFromSockAddrIn(SockAddrIn input) { result.sin_family = AF_INET; break; default: - UNIMPLEMENTED_MSG("Unhandled sockaddr family={}", static_cast<int>(input.family)); + UNIMPLEMENTED_MSG("Unhandled sockaddr family={}", input.family); result.sin_family = AF_INET; break; } @@ -99,7 +99,7 @@ bool EnableNonBlock(SOCKET fd, bool enable) { return ioctlsocket(fd, FIONBIO, &value) != SOCKET_ERROR; } -#elif __unix__ // ^ _WIN32 v __unix__ +#elif YUZU_UNIX // ^ _WIN32 v YUZU_UNIX using SOCKET = int; using WSAPOLLFD = pollfd; @@ -133,7 +133,7 @@ sockaddr TranslateFromSockAddrIn(SockAddrIn input) { result.sin_family = AF_INET; break; default: - UNIMPLEMENTED_MSG("Unhandled sockaddr family={}", static_cast<int>(input.family)); + UNIMPLEMENTED_MSG("Unhandled sockaddr family={}", input.family); result.sin_family = AF_INET; break; } @@ -186,7 +186,7 @@ int TranslateDomain(Domain domain) { case Domain::INET: return AF_INET; default: - UNIMPLEMENTED_MSG("Unimplemented domain={}", static_cast<int>(domain)); + UNIMPLEMENTED_MSG("Unimplemented domain={}", domain); return 0; } } @@ -198,7 +198,7 @@ int TranslateType(Type type) { case Type::DGRAM: return SOCK_DGRAM; default: - UNIMPLEMENTED_MSG("Unimplemented type={}", static_cast<int>(type)); + UNIMPLEMENTED_MSG("Unimplemented type={}", type); return 0; } } @@ -210,7 +210,7 @@ int TranslateProtocol(Protocol protocol) { case Protocol::UDP: return IPPROTO_UDP; default: - UNIMPLEMENTED_MSG("Unimplemented protocol={}", static_cast<int>(protocol)); + UNIMPLEMENTED_MSG("Unimplemented protocol={}", protocol); return 0; } } @@ -238,49 +238,49 @@ SockAddrIn TranslateToSockAddrIn(sockaddr input_) { return result; } -u16 TranslatePollEvents(u32 events) { - u32 result = 0; +short TranslatePollEvents(PollEvents events) { + short result = 0; - if ((events & POLL_IN) != 0) { - events &= ~POLL_IN; + if (True(events & PollEvents::In)) { + events &= ~PollEvents::In; result |= POLLIN; } - if ((events & POLL_PRI) != 0) { - events &= ~POLL_PRI; + if (True(events & PollEvents::Pri)) { + events &= ~PollEvents::Pri; #ifdef _WIN32 LOG_WARNING(Service, "Winsock doesn't support POLLPRI"); #else - result |= POLL_PRI; + result |= POLLPRI; #endif } - if ((events & POLL_OUT) != 0) { - events &= ~POLL_OUT; + if (True(events & PollEvents::Out)) { + events &= ~PollEvents::Out; result |= POLLOUT; } - UNIMPLEMENTED_IF_MSG(events != 0, "Unhandled guest events=0x{:x}", events); + UNIMPLEMENTED_IF_MSG((u16)events != 0, "Unhandled guest events=0x{:x}", (u16)events); - return static_cast<u16>(result); + return result; } -u16 TranslatePollRevents(u32 revents) { - u32 result = 0; - const auto translate = [&result, &revents](u32 host, u32 guest) { +PollEvents TranslatePollRevents(short revents) { + PollEvents result{}; + const auto translate = [&result, &revents](short host, PollEvents guest) { if ((revents & host) != 0) { - revents &= ~host; + revents &= static_cast<short>(~host); result |= guest; } }; - translate(POLLIN, POLL_IN); - translate(POLLPRI, POLL_PRI); - translate(POLLOUT, POLL_OUT); - translate(POLLERR, POLL_ERR); - translate(POLLHUP, POLL_HUP); + translate(POLLIN, PollEvents::In); + translate(POLLPRI, PollEvents::Pri); + translate(POLLOUT, PollEvents::Out); + translate(POLLERR, PollEvents::Err); + translate(POLLHUP, PollEvents::Hup); UNIMPLEMENTED_IF_MSG(revents != 0, "Unhandled host revents=0x{:x}", revents); - return static_cast<u16>(result); + return result; } template <typename T> @@ -350,7 +350,7 @@ std::pair<s32, Errno> Poll(std::vector<PollFD>& pollfds, s32 timeout) { } for (size_t i = 0; i < num; ++i) { - pollfds[i].revents = TranslatePollRevents(static_cast<u32>(host_pollfds[i].revents)); + pollfds[i].revents = TranslatePollRevents(host_pollfds[i].revents); } if (result > 0) { @@ -482,7 +482,7 @@ Errno Socket::Shutdown(ShutdownHow how) { host_how = SD_BOTH; break; default: - UNIMPLEMENTED_MSG("Unimplemented flag how={}", static_cast<int>(how)); + UNIMPLEMENTED_MSG("Unimplemented flag how={}", how); return Errno::SUCCESS; } if (shutdown(fd, host_how) != SOCKET_ERROR) { diff --git a/src/core/network/network.h b/src/core/network/network.h index 0622e4593..76b2821f2 100644 --- a/src/core/network/network.h +++ b/src/core/network/network.h @@ -61,19 +61,25 @@ struct SockAddrIn { }; /// Cross-platform poll fd structure + +enum class PollEvents : u16 { + // Using Pascal case because IN is a macro on Windows. + In = 1 << 0, + Pri = 1 << 1, + Out = 1 << 2, + Err = 1 << 3, + Hup = 1 << 4, + Nval = 1 << 5, +}; + +DECLARE_ENUM_FLAG_OPERATORS(PollEvents); + struct PollFD { Socket* socket; - u16 events; - u16 revents; + PollEvents events; + PollEvents revents; }; -constexpr u16 POLL_IN = 1 << 0; -constexpr u16 POLL_PRI = 1 << 1; -constexpr u16 POLL_OUT = 1 << 2; -constexpr u16 POLL_ERR = 1 << 3; -constexpr u16 POLL_HUP = 1 << 4; -constexpr u16 POLL_NVAL = 1 << 5; - class NetworkInstance { public: explicit NetworkInstance(); diff --git a/src/core/network/sockets.h b/src/core/network/sockets.h index 7bdff0fe4..a44393325 100644 --- a/src/core/network/sockets.h +++ b/src/core/network/sockets.h @@ -9,7 +9,7 @@ #if defined(_WIN32) #include <winsock.h> -#elif !defined(__unix__) +#elif !YUZU_UNIX #error "Platform not implemented" #endif @@ -84,7 +84,7 @@ public: #if defined(_WIN32) SOCKET fd = INVALID_SOCKET; -#elif defined(__unix__) +#elif YUZU_UNIX int fd = -1; #endif }; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index e9997a263..39306509a 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -72,8 +72,6 @@ void LogSettings() { log_setting("DataStorage_UseVirtualSd", values.use_virtual_sd); log_setting("DataStorage_NandDir", Common::FS::GetUserPath(Common::FS::UserPath::NANDDir)); log_setting("DataStorage_SdmcDir", Common::FS::GetUserPath(Common::FS::UserPath::SDMCDir)); - log_setting("Debugging_UseGdbstub", values.use_gdbstub); - log_setting("Debugging_GdbstubPort", values.gdbstub_port); log_setting("Debugging_ProgramArgs", values.program_args); log_setting("Services_BCATBackend", values.bcat_backend); log_setting("Services_BCATBoxcatLocal", values.bcat_boxcat_local); @@ -150,9 +148,4 @@ void RestoreGlobalState(bool is_powered_on) { values.motion_enabled.SetGlobal(true); } -void Sanitize() { - values.use_asynchronous_gpu_emulation.SetValue( - values.use_asynchronous_gpu_emulation.GetValue() || values.use_multi_core.GetValue()); -} - } // namespace Settings diff --git a/src/core/settings.h b/src/core/settings.h index 8e076f7ef..0cd3c0c84 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -180,6 +180,8 @@ struct Values { std::string motion_device; std::string udp_input_servers; + bool emulate_analog_keyboard; + bool mouse_enabled; std::string mouse_device; MouseButtonsRaw mouse_buttons; @@ -255,7 +257,4 @@ void LogSettings(); // Restore the global state of all applicable settings in the Values struct void RestoreGlobalState(bool is_powered_on); -// Fixes settings that are known to cause issues with the emulator -void Sanitize(); - } // namespace Settings diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp index d748c1c04..40b516f85 100755 --- a/src/input_common/analog_from_button.cpp +++ b/src/input_common/analog_from_button.cpp @@ -6,6 +6,7 @@ #include <cmath> #include <thread> #include "common/math_util.h" +#include "core/settings.h" #include "input_common/analog_from_button.h" namespace InputCommon { @@ -112,7 +113,26 @@ public: } std::tuple<float, float> GetStatus() const override { - return std::make_tuple(std::cos(angle) * amplitude, std::sin(angle) * amplitude); + if (Settings::values.emulate_analog_keyboard) { + return std::make_tuple(std::cos(angle) * amplitude, std::sin(angle) * amplitude); + } + constexpr float SQRT_HALF = 0.707106781f; + int x = 0, y = 0; + if (right->GetStatus()) { + ++x; + } + if (left->GetStatus()) { + --x; + } + if (up->GetStatus()) { + ++y; + } + if (down->GetStatus()) { + --y; + } + const float coef = modifier->GetStatus() ? modifier_scale : 1.0f; + return std::make_tuple(static_cast<float>(x) * coef * (y == 0 ? 1.0f : SQRT_HALF), + static_cast<float>(y) * coef * (x == 0 ? 1.0f : SQRT_HALF)); } bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override { diff --git a/src/input_common/gcadapter/gc_poller.cpp b/src/input_common/gcadapter/gc_poller.cpp index 4d1052414..9670bdeb2 100644 --- a/src/input_common/gcadapter/gc_poller.cpp +++ b/src/input_common/gcadapter/gc_poller.cpp @@ -139,10 +139,10 @@ void GCButtonFactory::EndConfiguration() { class GCAnalog final : public Input::AnalogDevice { public: - explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, - const GCAdapter::Adapter* adapter, float range_) - : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter), - range(range_) {} + explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_, + float deadzone_, float range_, const GCAdapter::Adapter* adapter) + : port(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_), + deadzone(deadzone_), range(range_), gcadapter(adapter) {} float GetAxis(u32 axis) const { if (gcadapter->DeviceConnected(port)) { @@ -157,7 +157,12 @@ public: std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { float x = GetAxis(analog_axis_x); float y = GetAxis(analog_axis_y); - + if (invert_x) { + x = -x; + } + if (invert_y) { + y = -y; + } // Make sure the coordinates are in the unit circle, // otherwise normalize it. float r = x * x + y * y; @@ -200,9 +205,11 @@ private: const u32 port; const u32 axis_x; const u32 axis_y; + const bool invert_x; + const bool invert_y; const float deadzone; - const GCAdapter::Adapter* gcadapter; const float range; + const GCAdapter::Adapter* gcadapter; mutable std::mutex mutex; }; @@ -223,8 +230,13 @@ std::unique_ptr<Input::AnalogDevice> GCAnalogFactory::Create(const Common::Param const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); + const std::string invert_x_value = params.Get("invert_x", "+"); + const std::string invert_y_value = params.Get("invert_y", "+"); + const bool invert_x = invert_x_value == "-"; + const bool invert_y = invert_y_value == "-"; - return std::make_unique<GCAnalog>(port, axis_x, axis_y, deadzone, adapter.get(), range); + return std::make_unique<GCAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range, + adapter.get()); } void GCAnalogFactory::BeginConfiguration() { @@ -282,6 +294,8 @@ Common::ParamPackage GCAnalogFactory::GetNextInput() { params.Set("port", controller_number); params.Set("axis_x", analog_x_axis); params.Set("axis_y", analog_y_axis); + params.Set("invert_x", "+"); + params.Set("invert_y", "+"); analog_x_axis = -1; analog_y_axis = -1; controller_number = -1; diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp index 7445ad3ad..508eb0c7d 100644 --- a/src/input_common/mouse/mouse_poller.cpp +++ b/src/input_common/mouse/mouse_poller.cpp @@ -62,10 +62,10 @@ void MouseButtonFactory::EndConfiguration() { class MouseAnalog final : public Input::AnalogDevice { public: - explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, float range_, - const MouseInput::Mouse* mouse_input_) - : button(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), range(range_), - mouse_input(mouse_input_) {} + explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_, + float deadzone_, float range_, const MouseInput::Mouse* mouse_input_) + : button(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_), + deadzone(deadzone_), range(range_), mouse_input(mouse_input_) {} float GetAxis(u32 axis) const { std::lock_guard lock{mutex}; @@ -77,6 +77,12 @@ public: std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { float x = GetAxis(analog_axis_x); float y = GetAxis(analog_axis_y); + if (invert_x) { + x = -x; + } + if (invert_y) { + y = -y; + } // Make sure the coordinates are in the unit circle, // otherwise normalize it. @@ -104,6 +110,8 @@ private: const u32 button; const u32 axis_x; const u32 axis_y; + const bool invert_x; + const bool invert_y; const float deadzone; const float range; const MouseInput::Mouse* mouse_input; @@ -128,8 +136,13 @@ std::unique_ptr<Input::AnalogDevice> MouseAnalogFactory::Create( const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); + const std::string invert_x_value = params.Get("invert_x", "+"); + const std::string invert_y_value = params.Get("invert_y", "+"); + const bool invert_x = invert_x_value == "-"; + const bool invert_y = invert_y_value == "-"; - return std::make_unique<MouseAnalog>(port, axis_x, axis_y, deadzone, range, mouse_input.get()); + return std::make_unique<MouseAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range, + mouse_input.get()); } void MouseAnalogFactory::BeginConfiguration() { @@ -153,6 +166,8 @@ Common::ParamPackage MouseAnalogFactory::GetNextInput() const { params.Set("port", static_cast<u16>(pad.button)); params.Set("axis_x", 0); params.Set("axis_y", 1); + params.Set("invert_x", "+"); + params.Set("invert_y", "+"); return params; } } diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp index d56b7587b..d32eb732a 100644 --- a/src/input_common/sdl/sdl_impl.cpp +++ b/src/input_common/sdl/sdl_impl.cpp @@ -352,13 +352,20 @@ private: class SDLAnalog final : public Input::AnalogDevice { public: explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, - float deadzone_, float range_) - : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), - range(range_) {} + bool invert_x_, bool invert_y_, float deadzone_, float range_) + : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), + invert_y(invert_y_), deadzone(deadzone_), range(range_) {} std::tuple<float, float> GetStatus() const override { - const auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range); + auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range); const float r = std::sqrt((x * x) + (y * y)); + if (invert_x) { + x = -x; + } + if (invert_y) { + y = -y; + } + if (r > deadzone) { return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone), y / r * (r - deadzone) / (1 - deadzone)); @@ -386,6 +393,8 @@ private: std::shared_ptr<SDLJoystick> joystick; const int axis_x; const int axis_y; + const bool invert_x; + const bool invert_y; const float deadzone; const float range; }; @@ -572,12 +581,17 @@ public: const int axis_y = params.Get("axis_y", 1); const float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); const float range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); + const std::string invert_x_value = params.Get("invert_x", "+"); + const std::string invert_y_value = params.Get("invert_y", "+"); + const bool invert_x = invert_x_value == "-"; + const bool invert_y = invert_y_value == "-"; auto joystick = state.GetSDLJoystickByGUID(guid, port); // This is necessary so accessing GetAxis with axis_x and axis_y won't crash joystick->SetAxis(axis_x, 0); joystick->SetAxis(axis_y, 0); - return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone, range); + return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, invert_x, invert_y, deadzone, + range); } private: @@ -886,6 +900,8 @@ Common::ParamPackage BuildParamPackageForAnalog(int port, const std::string& gui params.Set("guid", guid); params.Set("axis_x", axis_x); params.Set("axis_y", axis_y); + params.Set("invert_x", "+"); + params.Set("invert_y", "+"); return params; } } // Anonymous namespace diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index 17a9225d7..412d57896 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp @@ -225,6 +225,11 @@ void Client::OnPortInfo([[maybe_unused]] Response::PortInfo data) { } void Client::OnPadData(Response::PadData data, std::size_t client) { + // Accept packets only for the correct pad + if (static_cast<u8>(clients[client].pad_index) != data.info.id) { + return; + } + LOG_TRACE(Input, "PadData packet received"); if (data.packet_counter == clients[client].packet_sequence) { LOG_WARNING( diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 47ef30aa9..d80b0b688 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -2,7 +2,6 @@ add_executable(tests common/bit_field.cpp common/bit_utils.cpp common/fibers.cpp - common/multi_level_queue.cpp common/param_package.cpp common/ring_buffer.cpp core/arm/arm_test_common.cpp diff --git a/src/tests/common/multi_level_queue.cpp b/src/tests/common/multi_level_queue.cpp deleted file mode 100644 index cca7ec7da..000000000 --- a/src/tests/common/multi_level_queue.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2019 Yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <catch2/catch.hpp> -#include <math.h> -#include "common/common_types.h" -#include "common/multi_level_queue.h" - -namespace Common { - -TEST_CASE("MultiLevelQueue", "[common]") { - std::array<f32, 8> values = {0.0, 5.0, 1.0, 9.0, 8.0, 2.0, 6.0, 7.0}; - Common::MultiLevelQueue<f32, 64> mlq; - REQUIRE(mlq.empty()); - mlq.add(values[2], 2); - mlq.add(values[7], 7); - mlq.add(values[3], 3); - mlq.add(values[4], 4); - mlq.add(values[0], 0); - mlq.add(values[5], 5); - mlq.add(values[6], 6); - mlq.add(values[1], 1); - u32 index = 0; - bool all_set = true; - for (auto& f : mlq) { - all_set &= (f == values[index]); - index++; - } - REQUIRE(all_set); - REQUIRE(!mlq.empty()); - f32 v = 8.0; - mlq.add(v, 2); - v = -7.0; - mlq.add(v, 2, false); - REQUIRE(mlq.front(2) == -7.0); - mlq.yield(2); - REQUIRE(mlq.front(2) == values[2]); - REQUIRE(mlq.back(2) == -7.0); - REQUIRE(mlq.empty(8)); - v = 10.0; - mlq.add(v, 8); - mlq.adjust(v, 8, 9); - REQUIRE(mlq.front(9) == v); - REQUIRE(mlq.empty(8)); - REQUIRE(!mlq.empty(9)); - mlq.adjust(values[0], 0, 9); - REQUIRE(mlq.highest_priority_set() == 1); - REQUIRE(mlq.lowest_priority_set() == 9); - mlq.remove(values[1], 1); - REQUIRE(mlq.highest_priority_set() == 2); - REQUIRE(mlq.empty(1)); -} - -} // namespace Common diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index abcee2a1c..e050f9aed 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -25,6 +25,7 @@ add_library(video_core STATIC command_classes/vic.h compatible_formats.cpp compatible_formats.h + delayed_destruction_ring.h dirty_flags.cpp dirty_flags.h dma_pusher.cpp @@ -47,6 +48,7 @@ add_library(video_core STATIC engines/shader_bytecode.h engines/shader_header.h engines/shader_type.h + framebuffer_config.h macro/macro.cpp macro/macro.h macro/macro_hle.cpp @@ -58,10 +60,6 @@ add_library(video_core STATIC fence_manager.h gpu.cpp gpu.h - gpu_asynch.cpp - gpu_asynch.h - gpu_synch.cpp - gpu_synch.h gpu_thread.cpp gpu_thread.h guest_driver.cpp @@ -84,14 +82,10 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h - renderer_opengl/gl_framebuffer_cache.cpp - renderer_opengl/gl_framebuffer_cache.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h - renderer_opengl/gl_sampler_cache.cpp - renderer_opengl/gl_sampler_cache.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_decompiler.cpp @@ -113,10 +107,68 @@ add_library(video_core STATIC renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h - renderer_opengl/utils.cpp - renderer_opengl/utils.h - sampler_cache.cpp - sampler_cache.h + renderer_opengl/util_shaders.cpp + renderer_opengl/util_shaders.h + renderer_vulkan/blit_image.cpp + renderer_vulkan/blit_image.h + renderer_vulkan/fixed_pipeline_state.cpp + renderer_vulkan/fixed_pipeline_state.h + renderer_vulkan/maxwell_to_vk.cpp + renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/nsight_aftermath_tracker.cpp + renderer_vulkan/nsight_aftermath_tracker.h + renderer_vulkan/renderer_vulkan.h + renderer_vulkan/renderer_vulkan.cpp + renderer_vulkan/vk_blit_screen.cpp + renderer_vulkan/vk_blit_screen.h + renderer_vulkan/vk_buffer_cache.cpp + renderer_vulkan/vk_buffer_cache.h + renderer_vulkan/vk_command_pool.cpp + renderer_vulkan/vk_command_pool.h + renderer_vulkan/vk_compute_pass.cpp + renderer_vulkan/vk_compute_pass.h + renderer_vulkan/vk_compute_pipeline.cpp + renderer_vulkan/vk_compute_pipeline.h + renderer_vulkan/vk_descriptor_pool.cpp + renderer_vulkan/vk_descriptor_pool.h + renderer_vulkan/vk_device.cpp + renderer_vulkan/vk_device.h + renderer_vulkan/vk_fence_manager.cpp + renderer_vulkan/vk_fence_manager.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h + renderer_vulkan/vk_master_semaphore.cpp + renderer_vulkan/vk_master_semaphore.h + renderer_vulkan/vk_memory_manager.cpp + renderer_vulkan/vk_memory_manager.h + renderer_vulkan/vk_pipeline_cache.cpp + renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_query_cache.cpp + renderer_vulkan/vk_query_cache.h + renderer_vulkan/vk_rasterizer.cpp + renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_resource_pool.cpp + renderer_vulkan/vk_resource_pool.h + renderer_vulkan/vk_scheduler.cpp + renderer_vulkan/vk_scheduler.h + renderer_vulkan/vk_shader_decompiler.cpp + renderer_vulkan/vk_shader_decompiler.h + renderer_vulkan/vk_shader_util.cpp + renderer_vulkan/vk_shader_util.h + renderer_vulkan/vk_staging_buffer_pool.cpp + renderer_vulkan/vk_staging_buffer_pool.h + renderer_vulkan/vk_state_tracker.cpp + renderer_vulkan/vk_state_tracker.h + renderer_vulkan/vk_stream_buffer.cpp + renderer_vulkan/vk_stream_buffer.h + renderer_vulkan/vk_swapchain.cpp + renderer_vulkan/vk_swapchain.h + renderer_vulkan/vk_texture_cache.cpp + renderer_vulkan/vk_texture_cache.h + renderer_vulkan/vk_update_descriptor.cpp + renderer_vulkan/vk_update_descriptor.h + renderer_vulkan/wrapper.cpp + renderer_vulkan/wrapper.h shader_cache.h shader_notify.cpp shader_notify.h @@ -173,19 +225,32 @@ add_library(video_core STATIC shader/transform_feedback.h surface.cpp surface.h + texture_cache/accelerated_swizzle.cpp + texture_cache/accelerated_swizzle.h + texture_cache/decode_bc4.cpp + texture_cache/decode_bc4.h + texture_cache/descriptor_table.h + texture_cache/formatter.cpp + texture_cache/formatter.h texture_cache/format_lookup_table.cpp texture_cache/format_lookup_table.h - texture_cache/surface_base.cpp - texture_cache/surface_base.h - texture_cache/surface_params.cpp - texture_cache/surface_params.h - texture_cache/surface_view.cpp - texture_cache/surface_view.h + texture_cache/image_base.cpp + texture_cache/image_base.h + texture_cache/image_info.cpp + texture_cache/image_info.h + texture_cache/image_view_base.cpp + texture_cache/image_view_base.h + texture_cache/image_view_info.cpp + texture_cache/image_view_info.h + texture_cache/render_targets.h + texture_cache/samples_helper.h + texture_cache/slot_vector.h texture_cache/texture_cache.h + texture_cache/types.h + texture_cache/util.cpp + texture_cache/util.h textures/astc.cpp textures/astc.h - textures/convert.cpp - textures/convert.h textures/decoders.cpp textures/decoders.h textures/texture.cpp @@ -194,75 +259,6 @@ add_library(video_core STATIC video_core.h ) -if (ENABLE_VULKAN) - target_sources(video_core PRIVATE - renderer_vulkan/fixed_pipeline_state.cpp - renderer_vulkan/fixed_pipeline_state.h - renderer_vulkan/maxwell_to_vk.cpp - renderer_vulkan/maxwell_to_vk.h - renderer_vulkan/nsight_aftermath_tracker.cpp - renderer_vulkan/nsight_aftermath_tracker.h - renderer_vulkan/renderer_vulkan.h - renderer_vulkan/renderer_vulkan.cpp - renderer_vulkan/vk_blit_screen.cpp - renderer_vulkan/vk_blit_screen.h - renderer_vulkan/vk_buffer_cache.cpp - renderer_vulkan/vk_buffer_cache.h - renderer_vulkan/vk_command_pool.cpp - renderer_vulkan/vk_command_pool.h - renderer_vulkan/vk_compute_pass.cpp - renderer_vulkan/vk_compute_pass.h - renderer_vulkan/vk_compute_pipeline.cpp - renderer_vulkan/vk_compute_pipeline.h - renderer_vulkan/vk_descriptor_pool.cpp - renderer_vulkan/vk_descriptor_pool.h - renderer_vulkan/vk_device.cpp - renderer_vulkan/vk_device.h - renderer_vulkan/vk_fence_manager.cpp - renderer_vulkan/vk_fence_manager.h - renderer_vulkan/vk_graphics_pipeline.cpp - renderer_vulkan/vk_graphics_pipeline.h - renderer_vulkan/vk_image.cpp - renderer_vulkan/vk_image.h - renderer_vulkan/vk_master_semaphore.cpp - renderer_vulkan/vk_master_semaphore.h - renderer_vulkan/vk_memory_manager.cpp - renderer_vulkan/vk_memory_manager.h - renderer_vulkan/vk_pipeline_cache.cpp - renderer_vulkan/vk_pipeline_cache.h - renderer_vulkan/vk_query_cache.cpp - renderer_vulkan/vk_query_cache.h - renderer_vulkan/vk_rasterizer.cpp - renderer_vulkan/vk_rasterizer.h - renderer_vulkan/vk_renderpass_cache.cpp - renderer_vulkan/vk_renderpass_cache.h - renderer_vulkan/vk_resource_pool.cpp - renderer_vulkan/vk_resource_pool.h - renderer_vulkan/vk_sampler_cache.cpp - renderer_vulkan/vk_sampler_cache.h - renderer_vulkan/vk_scheduler.cpp - renderer_vulkan/vk_scheduler.h - renderer_vulkan/vk_shader_decompiler.cpp - renderer_vulkan/vk_shader_decompiler.h - renderer_vulkan/vk_shader_util.cpp - renderer_vulkan/vk_shader_util.h - renderer_vulkan/vk_staging_buffer_pool.cpp - renderer_vulkan/vk_staging_buffer_pool.h - renderer_vulkan/vk_state_tracker.cpp - renderer_vulkan/vk_state_tracker.h - renderer_vulkan/vk_stream_buffer.cpp - renderer_vulkan/vk_stream_buffer.h - renderer_vulkan/vk_swapchain.cpp - renderer_vulkan/vk_swapchain.h - renderer_vulkan/vk_texture_cache.cpp - renderer_vulkan/vk_texture_cache.h - renderer_vulkan/vk_update_descriptor.cpp - renderer_vulkan/vk_update_descriptor.h - renderer_vulkan/wrapper.cpp - renderer_vulkan/wrapper.h - ) -endif() - create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) @@ -278,12 +274,8 @@ endif() add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) - -if (ENABLE_VULKAN) - target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) - target_compile_definitions(video_core PRIVATE HAS_VULKAN) - target_link_libraries(video_core PRIVATE sirit) -endif() +target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) +target_link_libraries(video_core PRIVATE sirit) if (ENABLE_NSIGHT_AFTERMATH) if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) @@ -297,13 +289,21 @@ if (ENABLE_NSIGHT_AFTERMATH) endif() if (MSVC) - target_compile_options(video_core PRIVATE /we4267) + target_compile_options(video_core PRIVATE + /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data + /we4456 # Declaration of 'identifier' hides previous local declaration + /we4457 # Declaration of 'identifier' hides function parameter + /we4458 # Declaration of 'identifier' hides class member + /we4459 # Declaration of 'identifier' hides global declaration + /we4715 # 'function' : not all control paths return a value + ) else() target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion -Werror=pessimizing-move -Werror=redundant-move + -Werror=shadow -Werror=switch -Werror=type-limits -Werror=unused-variable diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index e64170e66..e9306194a 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -4,34 +4,29 @@ #pragma once -#include <unordered_set> -#include <utility> - -#include "common/alignment.h" #include "common/common_types.h" -#include "video_core/gpu.h" namespace VideoCommon { class BufferBlock { public: - bool Overlaps(VAddr start, VAddr end) const { + [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const { return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(VAddr other_start, VAddr other_end) const { + [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const { return cpu_addr <= other_start && other_end <= cpu_addr_end; } - std::size_t Offset(VAddr in_addr) const { + [[nodiscard]] std::size_t Offset(VAddr in_addr) const { return static_cast<std::size_t>(in_addr - cpu_addr); } - VAddr CpuAddr() const { + [[nodiscard]] VAddr CpuAddr() const { return cpu_addr; } - VAddr CpuAddrEnd() const { + [[nodiscard]] VAddr CpuAddrEnd() const { return cpu_addr_end; } @@ -40,11 +35,11 @@ public: cpu_addr_end = new_addr + size; } - std::size_t Size() const { + [[nodiscard]] std::size_t Size() const { return size; } - u64 Epoch() const { + [[nodiscard]] u64 Epoch() const { return epoch; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e7edd733f..83b9ee871 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -118,20 +118,17 @@ public: /// Prepares the buffer cache for data uploading /// @param max_size Maximum number of bytes that will be uploaded /// @return True when a stream buffer invalidation was required, false otherwise - bool Map(std::size_t max_size) { + void Map(std::size_t max_size) { std::lock_guard lock{mutex}; - bool invalidated; - std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); + std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); buffer_offset = buffer_offset_base; - - return invalidated; } /// Finishes the upload stream void Unmap() { std::lock_guard lock{mutex}; - stream_buffer->Unmap(buffer_offset - buffer_offset_base); + stream_buffer.Unmap(buffer_offset - buffer_offset_base); } /// Function called at the end of each frame, inteded for deferred operations @@ -261,9 +258,9 @@ public: protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - std::unique_ptr<StreamBuffer> stream_buffer_) + StreamBuffer& stream_buffer_) : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, - stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} + stream_buffer{stream_buffer_} {} ~BufferCache() = default; @@ -441,7 +438,7 @@ private: buffer_ptr += size; buffer_offset += size; - return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; + return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()}; } void AlignBuffer(std::size_t alignment) { @@ -545,7 +542,7 @@ private: bool IsRegionWritten(VAddr start, VAddr end) const { const u64 page_end = end >> WRITE_PAGE_BIT; for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { - if (written_pages.count(page_start) > 0) { + if (written_pages.contains(page_start)) { return true; } } @@ -567,9 +564,7 @@ private: VideoCore::RasterizerInterface& rasterizer; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - - std::unique_ptr<StreamBuffer> stream_buffer; - BufferType stream_buffer_handle; + StreamBuffer& stream_buffer; u8* buffer_ptr = nullptr; u64 buffer_offset = 0; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index fe0bcd1d8..ef974b08a 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -84,9 +84,10 @@ private: void FillFreeList(Chunk& chunk); std::vector<MapInterval*> free_list; - std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; Chunk first_chunk; + + std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; }; } // namespace VideoCommon diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index b60f86260..e3e7432f7 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -29,8 +29,8 @@ #include "video_core/memory_manager.h" namespace Tegra { -CDmaPusher::CDmaPusher(GPU& gpu) - : gpu(gpu), nvdec_processor(std::make_shared<Nvdec>(gpu)), +CDmaPusher::CDmaPusher(GPU& gpu_) + : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), host1x_processor(std::make_unique<Host1x>(gpu)), nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)), @@ -100,11 +100,11 @@ void CDmaPusher::Step() { } } -void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { +void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { switch (current_class) { case ChClassId::NvDec: - ThiStateWrite(nvdec_thi_state, offset, {data}); - switch (static_cast<ThiMethod>(offset)) { + ThiStateWrite(nvdec_thi_state, state_offset, {data}); + switch (static_cast<ThiMethod>(state_offset)) { case ThiMethod::IncSyncpt: { LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method"); const auto syncpoint_id = static_cast<u32>(data & 0xFF); @@ -120,16 +120,16 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast<u32>(nvdec_thi_state.method_0)); - nvdec_processor->ProcessMethod( - static_cast<Tegra::Nvdec::Method>(nvdec_thi_state.method_0), {data}); + nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), + {data}); break; default: break; } break; case ChClassId::GraphicsVic: - ThiStateWrite(vic_thi_state, static_cast<u32>(offset), {data}); - switch (static_cast<ThiMethod>(offset)) { + ThiStateWrite(vic_thi_state, static_cast<u32>(state_offset), {data}); + switch (static_cast<ThiMethod>(state_offset)) { case ThiMethod::IncSyncpt: { LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method"); const auto syncpoint_id = static_cast<u32>(data & 0xFF); @@ -145,8 +145,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", static_cast<u32>(vic_thi_state.method_0), data); - vic_processor->ProcessMethod(static_cast<Tegra::Vic::Method>(vic_thi_state.method_0), - {data}); + vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), {data}); break; default: break; @@ -155,7 +154,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { case ChClassId::Host1x: // This device is mainly for syncpoint synchronization LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); - host1x_processor->ProcessMethod(static_cast<Tegra::Host1x::Method>(offset), {data}); + host1x_processor->ProcessMethod(static_cast<Host1x::Method>(state_offset), {data}); break; default: UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class)); @@ -163,9 +162,10 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { } } -void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments) { - u8* const state_offset = reinterpret_cast<u8*>(&state) + sizeof(u32) * offset; - std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size()); +void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset, + const std::vector<u32>& arguments) { + u8* const state_offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset; + std::memcpy(state_offset_ptr, arguments.data(), sizeof(u32) * arguments.size()); } } // namespace Tegra diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 982f309c5..0db1cd646 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -68,8 +68,8 @@ struct ChCommand { std::vector<u32> arguments; }; -using ChCommandHeaderList = std::vector<Tegra::ChCommandHeader>; -using ChCommandList = std::vector<Tegra::ChCommand>; +using ChCommandHeaderList = std::vector<ChCommandHeader>; +using ChCommandList = std::vector<ChCommand>; struct ThiRegisters { u32_le increment_syncpt{}; @@ -96,7 +96,7 @@ enum class ThiMethod : u32 { class CDmaPusher { public: - explicit CDmaPusher(GPU& gpu); + explicit CDmaPusher(GPU& gpu_); ~CDmaPusher(); /// Push NVDEC command buffer entries into queue @@ -109,17 +109,17 @@ public: void Step(); /// Invoke command class devices to execute the command based on the current state - void ExecuteCommand(u32 offset, u32 data); + void ExecuteCommand(u32 state_offset, u32 data); private: /// Write arguments value to the ThiRegisters member at the specified offset - void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments); + void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); GPU& gpu; - std::shared_ptr<Tegra::Nvdec> nvdec_processor; - std::unique_ptr<Tegra::Vic> vic_processor; - std::unique_ptr<Tegra::Host1x> host1x_processor; + std::shared_ptr<Nvdec> nvdec_processor; + std::unique_ptr<Vic> vic_processor; + std::unique_ptr<Host1x> host1x_processor; std::unique_ptr<SyncptIncrManager> nvdec_sync; std::unique_ptr<SyncptIncrManager> vic_sync; ChClassId current_class{}; diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index f547f5bd4..39bc923a5 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -44,7 +44,7 @@ Codec::~Codec() { } void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec); current_codec = codec; } @@ -62,7 +62,7 @@ void Codec::Decode() { } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); } else { - LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast<u32>(current_codec)); + LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); return; } diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 7d8d6ee3c..59e586695 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -233,7 +233,7 @@ constexpr std::array<s32, 254> map_lut{ } } // Anonymous namespace -VP9::VP9(GPU& gpu) : gpu(gpu) {} +VP9::VP9(GPU& gpu_) : gpu{gpu_} {} VP9::~VP9() = default; @@ -374,43 +374,43 @@ void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { } Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) { - Vp9FrameContainer frame{}; + Vp9FrameContainer current_frame{}; { gpu.SyncGuestHost(); - frame.info = GetVp9PictureInfo(state); - frame.bit_stream.resize(frame.info.bitstream_size); - gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), - frame.info.bitstream_size); + current_frame.info = GetVp9PictureInfo(state); + current_frame.bit_stream.resize(current_frame.info.bitstream_size); + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(), + current_frame.info.bitstream_size); } // Buffer two frames, saving the last show frame info if (!next_next_frame.bit_stream.empty()) { Vp9FrameContainer temp{ - .info = frame.info, - .bit_stream = std::move(frame.bit_stream), + .info = current_frame.info, + .bit_stream = std::move(current_frame.bit_stream), }; - next_next_frame.info.show_frame = frame.info.last_frame_shown; - frame.info = next_next_frame.info; - frame.bit_stream = std::move(next_next_frame.bit_stream); + next_next_frame.info.show_frame = current_frame.info.last_frame_shown; + current_frame.info = next_next_frame.info; + current_frame.bit_stream = std::move(next_next_frame.bit_stream); next_next_frame = std::move(temp); if (!next_frame.bit_stream.empty()) { Vp9FrameContainer temp2{ - .info = frame.info, - .bit_stream = std::move(frame.bit_stream), + .info = current_frame.info, + .bit_stream = std::move(current_frame.bit_stream), }; - next_frame.info.show_frame = frame.info.last_frame_shown; - frame.info = next_frame.info; - frame.bit_stream = std::move(next_frame.bit_stream); + next_frame.info.show_frame = current_frame.info.last_frame_shown; + current_frame.info = next_frame.info; + current_frame.bit_stream = std::move(next_frame.bit_stream); next_frame = std::move(temp2); } else { - next_frame.info = frame.info; - next_frame.bit_stream = std::move(frame.bit_stream); + next_frame.info = current_frame.info; + next_frame.bit_stream = std::move(current_frame.bit_stream); } } else { - next_next_frame.info = frame.info; - next_next_frame.bit_stream = std::move(frame.bit_stream); + next_next_frame.info = current_frame.info; + next_next_frame.bit_stream = std::move(current_frame.bit_stream); } - return frame; + return current_frame; } std::vector<u8> VP9::ComposeCompressedHeader() { diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index 9ebbbf59e..8396c8105 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -108,7 +108,7 @@ private: class VP9 { public: - explicit VP9(GPU& gpu); + explicit VP9(GPU& gpu_); ~VP9(); VP9(const VP9&) = delete; diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 6cfc193fa..aa8c9f9de 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -9,7 +9,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" -#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/decoders.h" extern "C" { #include <libswscale/swscale.h> @@ -27,7 +27,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) { } void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) { - LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method)); + LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method); VicStateWrite(static_cast<u32>(method), arguments[0]); const u64 arg = static_cast<u64>(arguments[0]) << 8; switch (method) { @@ -105,9 +105,9 @@ void Vic::Execute() { const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, block_height, 0); std::vector<u8> swizzled_data(size); - Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, - swizzled_data.data(), converted_frame_buffer.get(), - false, block_height, 0, 1); + Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, + frame->width, 4, swizzled_data.data(), + converted_frame_buffer.get(), block_height, 0, 0); gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); gpu.Maxwell3D().OnMemoryWrite(); diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index b06c32c84..1619d8664 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -3,9 +3,9 @@ // Refer to the license.txt file included. #include <array> -#include <bitset> #include <cstddef> +#include "common/common_types.h" #include "video_core/compatible_formats.h" #include "video_core/surface.h" @@ -13,23 +13,25 @@ namespace VideoCore::Surface { namespace { +using Table = std::array<std::array<u64, 2>, MaxPixelFormat>; + // Compatibility table taken from Table 3.X.2 in: // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt -constexpr std::array VIEW_CLASS_128_BITS = { +constexpr std::array VIEW_CLASS_128_BITS{ PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_SINT, }; -constexpr std::array VIEW_CLASS_96_BITS = { +constexpr std::array VIEW_CLASS_96_BITS{ PixelFormat::R32G32B32_FLOAT, }; // Missing formats: // PixelFormat::RGB32UI, // PixelFormat::RGB32I, -constexpr std::array VIEW_CLASS_64_BITS = { +constexpr std::array VIEW_CLASS_64_BITS{ PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, @@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = { // TODO: How should we handle 48 bits? -constexpr std::array VIEW_CLASS_32_BITS = { +constexpr std::array VIEW_CLASS_32_BITS{ PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, @@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = { // TODO: How should we handle 24 bits? -constexpr std::array VIEW_CLASS_16_BITS = { +constexpr std::array VIEW_CLASS_16_BITS{ PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, }; -constexpr std::array VIEW_CLASS_8_BITS = { +constexpr std::array VIEW_CLASS_8_BITS{ PixelFormat::R8_UINT, PixelFormat::R8_UNORM, PixelFormat::R8_SINT, PixelFormat::R8_SNORM, }; -constexpr std::array VIEW_CLASS_RGTC1_RED = { +constexpr std::array VIEW_CLASS_RGTC1_RED{ PixelFormat::BC4_UNORM, PixelFormat::BC4_SNORM, }; -constexpr std::array VIEW_CLASS_RGTC2_RG = { +constexpr std::array VIEW_CLASS_RGTC2_RG{ PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, }; -constexpr std::array VIEW_CLASS_BPTC_UNORM = { +constexpr std::array VIEW_CLASS_BPTC_UNORM{ PixelFormat::BC7_UNORM, PixelFormat::BC7_SRGB, }; -constexpr std::array VIEW_CLASS_BPTC_FLOAT = { +constexpr std::array VIEW_CLASS_BPTC_FLOAT{ PixelFormat::BC6H_SFLOAT, PixelFormat::BC6H_UFLOAT, }; +constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{ + PixelFormat::ASTC_2D_4X4_UNORM, + PixelFormat::ASTC_2D_4X4_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{ + PixelFormat::ASTC_2D_5X4_UNORM, + PixelFormat::ASTC_2D_5X4_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{ + PixelFormat::ASTC_2D_5X5_UNORM, + PixelFormat::ASTC_2D_5X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{ + PixelFormat::ASTC_2D_6X5_UNORM, + PixelFormat::ASTC_2D_6X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{ + PixelFormat::ASTC_2D_6X6_UNORM, + PixelFormat::ASTC_2D_6X6_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{ + PixelFormat::ASTC_2D_8X5_UNORM, + PixelFormat::ASTC_2D_8X5_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{ + PixelFormat::ASTC_2D_8X8_UNORM, + PixelFormat::ASTC_2D_8X8_SRGB, +}; + +// Missing formats: +// PixelFormat::ASTC_2D_10X5_UNORM +// PixelFormat::ASTC_2D_10X5_SRGB + +// Missing formats: +// PixelFormat::ASTC_2D_10X6_UNORM +// PixelFormat::ASTC_2D_10X6_SRGB + +constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{ + PixelFormat::ASTC_2D_10X8_UNORM, + PixelFormat::ASTC_2D_10X8_SRGB, +}; + +constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{ + PixelFormat::ASTC_2D_10X10_UNORM, + PixelFormat::ASTC_2D_10X10_SRGB, +}; + +// Missing formats +// ASTC_2D_12X10_UNORM, +// ASTC_2D_12X10_SRGB, + +constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{ + PixelFormat::ASTC_2D_12X12_UNORM, + PixelFormat::ASTC_2D_12X12_SRGB, +}; + // Compatibility table taken from Table 4.X.1 in: // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt -constexpr std::array COPY_CLASS_128_BITS = { +constexpr std::array COPY_CLASS_128_BITS{ PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, @@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = { // PixelFormat::RGBA32I // COMPRESSED_RG_RGTC2 -constexpr std::array COPY_CLASS_64_BITS = { +constexpr std::array COPY_CLASS_64_BITS{ PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, @@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = { // COMPRESSED_RGBA_S3TC_DXT1_EXT // COMPRESSED_SIGNED_RED_RGTC1 -void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { - compatiblity[format_a][format_b] = true; - compatiblity[format_b][format_a] = true; +constexpr void Enable(Table& table, size_t format_a, size_t format_b) { + table[format_a][format_b / 64] |= u64(1) << (format_b % 64); + table[format_b][format_a / 64] |= u64(1) << (format_a % 64); } -void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { - Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); +constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) { + Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); } template <typename Range> -void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { +constexpr void EnableRange(Table& table, const Range& range) { for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { for (auto it_b = it_a; it_b != range.end(); ++it_b) { - Enable(compatibility, *it_a, *it_b); + Enable(table, *it_a, *it_b); } } } -} // Anonymous namespace +constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) { + const size_t a = static_cast<size_t>(format_a); + const size_t b = static_cast<size_t>(format_b); + return ((table[a][b / 64] >> (b % 64)) & 1) != 0; +} -FormatCompatibility::FormatCompatibility() { +constexpr Table MakeViewTable() { + Table view{}; for (size_t i = 0; i < MaxPixelFormat; ++i) { // Identity is allowed Enable(view, i, i); } - EnableRange(view, VIEW_CLASS_128_BITS); EnableRange(view, VIEW_CLASS_96_BITS); EnableRange(view, VIEW_CLASS_64_BITS); @@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() { EnableRange(view, VIEW_CLASS_RGTC2_RG); EnableRange(view, VIEW_CLASS_BPTC_UNORM); EnableRange(view, VIEW_CLASS_BPTC_FLOAT); + EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA); + EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA); + return view; +} - copy = view; +constexpr Table MakeCopyTable() { + Table copy = MakeViewTable(); EnableRange(copy, COPY_CLASS_128_BITS); EnableRange(copy, COPY_CLASS_64_BITS); + return copy; +} + +} // Anonymous namespace + +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) { + static constexpr Table TABLE = MakeViewTable(); + return IsSupported(TABLE, format_a, format_b); +} + +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) { + static constexpr Table TABLE = MakeCopyTable(); + return IsSupported(TABLE, format_a, format_b); } } // namespace VideoCore::Surface diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h index 51766349b..b5eb03bea 100644 --- a/src/video_core/compatible_formats.h +++ b/src/video_core/compatible_formats.h @@ -4,31 +4,12 @@ #pragma once -#include <array> -#include <bitset> -#include <cstddef> - #include "video_core/surface.h" namespace VideoCore::Surface { -class FormatCompatibility { -public: - using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>; - - explicit FormatCompatibility(); - - bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { - return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; - } - - bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { - return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; - } +bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b); -private: - Table view; - Table copy; -}; +bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b); } // namespace VideoCore::Surface diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h new file mode 100644 index 000000000..4f1d29c04 --- /dev/null +++ b/src/video_core/delayed_destruction_ring.h @@ -0,0 +1,32 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstddef> +#include <utility> +#include <vector> + +namespace VideoCommon { + +/// Container to push objects to be destroyed a few ticks in the future +template <typename T, size_t TICKS_TO_DESTROY> +class DelayedDestructionRing { +public: + void Tick() { + index = (index + 1) % TICKS_TO_DESTROY; + elements[index].clear(); + } + + void Push(T&& object) { + elements[index].push_back(std::move(object)); + } + +private: + size_t index = 0; + std::array<std::vector<T>, TICKS_TO_DESTROY> elements; +}; + +} // namespace VideoCommon diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index e16075993..b1eaac00c 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -9,13 +9,16 @@ #include "video_core/dirty_flags.h" #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) -#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32)) +#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace VideoCommon::Dirty { using Tegra::Engines::Maxwell3D; void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); + FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); + static constexpr std::size_t num_per_rt = NUM(rt[0]); static constexpr std::size_t begin = OFF(rt); static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; @@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); } FillBlock(tables[1], begin, num, RenderTargets); + FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets); + + tables[0][OFF(rt_control)] = RenderTargets; + tables[1][OFF(rt_control)] = RenderTargetControl; static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 3f6c1d83a..875527ddd 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -16,7 +16,10 @@ namespace VideoCommon::Dirty { enum : u8 { NullEntry = 0, + Descriptors, + RenderTargets, + RenderTargetControl, ColorBuffer0, ColorBuffer1, ColorBuffer2, diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index d8801b1f5..2c8b20024 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -13,7 +13,7 @@ namespace Tegra { -DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} +DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {} DmaPusher::~DmaPusher() = default; @@ -152,7 +152,12 @@ void DmaPusher::SetState(const CommandHeader& command_header) { void DmaPusher::CallMethod(u32 argument) const { if (dma_state.method < non_puller_methods) { - gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); + gpu.CallMethod(GPU::MethodCall{ + dma_state.method, + argument, + dma_state.subchannel, + dma_state.method_count, + }); } else { subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument, dma_state.is_last_call); diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 96ac267f7..19f286fa7 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -87,11 +87,11 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub struct CommandList final { CommandList() = default; explicit CommandList(std::size_t size) : command_lists(size) {} - explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list) - : prefetch_command_list{std::move(prefetch_command_list)} {} + explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_) + : prefetch_command_list{std::move(prefetch_command_list_)} {} - std::vector<Tegra::CommandListHeader> command_lists; - std::vector<Tegra::CommandHeader> prefetch_command_list; + std::vector<CommandListHeader> command_lists; + std::vector<CommandHeader> prefetch_command_list; }; /** @@ -103,7 +103,7 @@ struct CommandList final { */ class DmaPusher final { public: - explicit DmaPusher(Core::System& system, GPU& gpu); + explicit DmaPusher(Core::System& system_, GPU& gpu_); ~DmaPusher(); void Push(CommandList&& entries) { @@ -112,7 +112,7 @@ public: void DispatchCalls(); - void BindSubchannel(Tegra::Engines::EngineInterface* engine, u32 subchannel_id) { + void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) { subchannels[subchannel_id] = engine; } @@ -145,7 +145,7 @@ private: bool ib_enable{true}; ///< IB mode enabled - std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{}; + std::array<Engines::EngineInterface*, max_subchannels> subchannels{}; GPU& gpu; Core::System& system; diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index d44ad0cd8..71d7e1473 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -11,16 +11,16 @@ namespace Tegra::Engines::Upload { -State::State(MemoryManager& memory_manager, Registers& regs) - : regs{regs}, memory_manager{memory_manager} {} +State::State(MemoryManager& memory_manager_, Registers& regs_) + : regs{regs_}, memory_manager{memory_manager_} {} State::~State() = default; -void State::ProcessExec(const bool is_linear) { +void State::ProcessExec(const bool is_linear_) { write_offset = 0; copy_size = regs.line_length_in * regs.line_count; inner_buffer.resize(copy_size); - this->is_linear = is_linear; + is_linear = is_linear_; } void State::ProcessData(const u32 data, const bool is_last_call) { diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 462da419e..1c7f1effa 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -54,10 +54,10 @@ struct Registers { class State { public: - State(MemoryManager& memory_manager, Registers& regs); + explicit State(MemoryManager& memory_manager_, Registers& regs_); ~State(); - void ProcessExec(bool is_linear); + void ProcessExec(bool is_linear_); void ProcessData(u32 data, bool is_last_call); private: diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 9409c4075..a01d334ad 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -10,7 +10,11 @@ namespace Tegra::Engines { -Fermi2D::Fermi2D() = default; +Fermi2D::Fermi2D() { + // Nvidia's OpenGL driver seems to assume these values + regs.src.depth = 1; + regs.dst.depth = 1; +} Fermi2D::~Fermi2D() = default; @@ -21,79 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Fermi2D register, increase the size of the Regs structure"); - regs.reg_array[method] = method_argument; - switch (method) { - // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, - // so trigger on the second 32-bit write. - case FERMI2D_REG_INDEX(blit_src_y) + 1: { - HandleSurfaceCopy(); - break; - } + if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) { + Blit(); } } void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { - for (std::size_t i = 0; i < amount; i++) { - CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); + for (u32 i = 0; i < amount; ++i) { + CallMethod(method, base_start[i], methods_pending - i <= 1); } } -static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { - const u32 line_a = src_2 - src_1; - const u32 line_b = dst_2 - dst_1; - const u32 excess = std::max<s32>(0, line_a - src_line + src_1); - return {line_b - (excess * line_b) / line_a, excess}; -} - -void Fermi2D::HandleSurfaceCopy() { - LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", - static_cast<u32>(regs.operation)); +void Fermi2D::Blit() { + LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", + regs.src.Address(), regs.dst.Address()); - // TODO(Subv): Only raw copies are implemented. - ASSERT(regs.operation == Operation::SrcCopy); + UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy"); + UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero"); + UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero"); + UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one"); + UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); - const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; - const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; - u32 src_blit_x2, src_blit_y2; - if (regs.blit_control.origin == Origin::Corner) { - src_blit_x2 = - static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); - src_blit_y2 = - static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); - } else { - src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width); - src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height); - } - u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width; - u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height; - const auto [new_dst_w, src_excess_x] = - DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width); - const auto [new_dst_h, src_excess_y] = - DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height); - dst_blit_x2 = new_dst_w + regs.blit_dst_x; - src_blit_x2 = src_blit_x2 - src_excess_x; - dst_blit_y2 = new_dst_h + regs.blit_dst_y; - src_blit_y2 = src_blit_y2 - src_excess_y; - const auto [new_src_w, dst_excess_x] = - DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width); - const auto [new_src_h, dst_excess_y] = - DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height); - src_blit_x2 = new_src_w + src_blit_x1; - dst_blit_x2 = dst_blit_x2 - dst_excess_x; - src_blit_y2 = new_src_h + src_blit_y1; - dst_blit_y2 = dst_blit_y2 - dst_excess_y; - const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; - const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2, - dst_blit_y2}; - const Config copy_config{ + const auto& args = regs.pixels_from_memory; + const Config config{ .operation = regs.operation, - .filter = regs.blit_control.filter, - .src_rect = src_rect, - .dst_rect = dst_rect, + .filter = args.sample_mode.filter, + .dst_x0 = args.dst_x0, + .dst_y0 = args.dst_y0, + .dst_x1 = args.dst_x0 + args.dst_width, + .dst_y1 = args.dst_y0 + args.dst_height, + .src_x0 = static_cast<s32>(args.src_x0 >> 32), + .src_y0 = static_cast<s32>(args.src_y0 >> 32), + .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), + .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), }; - if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { + if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0909709ec..81522988e 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -53,8 +53,8 @@ public: }; enum class Filter : u32 { - PointSample = 0, // Nearest - Linear = 1, + Point = 0, + Bilinear = 1, }; enum class Operation : u32 { @@ -67,88 +67,235 @@ public: BlendPremult = 6, }; - struct Regs { - static constexpr std::size_t NUM_REGS = 0x258; + enum class MemoryLayout : u32 { + BlockLinear = 0, + Pitch = 1, + }; - struct Surface { - RenderTargetFormat format; - BitField<0, 1, u32> linear; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - }; - u32 depth; - u32 layer; - u32 pitch; - u32 width; - u32 height; - u32 address_high; - u32 address_low; - - GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); - } - - u32 BlockWidth() const { - return block_width.Value(); - } - - u32 BlockHeight() const { - return block_height.Value(); - } - - u32 BlockDepth() const { - return block_depth.Value(); - } - }; - static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); + enum class CpuIndexWrap : u32 { + Wrap = 0, + NoWrap = 1, + }; + struct Surface { + RenderTargetFormat format; + MemoryLayout linear; union { - struct { - INSERT_UNION_PADDING_WORDS(0x80); + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + }; + u32 depth; + u32 layer; + u32 pitch; + u32 width; + u32 height; + u32 addr_upper; + u32 addr_lower; + + [[nodiscard]] constexpr GPUVAddr Address() const noexcept { + return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower); + } + }; + static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); - Surface dst; + enum class SectorPromotion : u32 { + NoPromotion = 0, + PromoteTo2V = 1, + PromoteTo2H = 2, + PromoteTo4 = 3, + }; + + enum class NumTpcs : u32 { + All = 0, + One = 1, + }; - INSERT_UNION_PADDING_WORDS(2); + enum class RenderEnableMode : u32 { + False = 0, + True = 1, + Conditional = 2, + RenderIfEqual = 3, + RenderIfNotEqual = 4, + }; - Surface src; + enum class ColorKeyFormat : u32 { + A16R56G6B5 = 0, + A1R5G55B5 = 1, + A8R8G8B8 = 2, + A2R10G10B10 = 3, + Y8 = 4, + Y16 = 5, + Y32 = 6, + }; - INSERT_UNION_PADDING_WORDS(0x15); + union Beta4 { + BitField<0, 8, u32> b; + BitField<8, 8, u32> g; + BitField<16, 8, u32> r; + BitField<24, 8, u32> a; + }; - Operation operation; + struct Point { + u32 x; + u32 y; + }; - INSERT_UNION_PADDING_WORDS(0x177); + enum class PatternSelect : u32 { + MonoChrome8x8 = 0, + MonoChrome64x1 = 1, + MonoChrome1x64 = 2, + Color = 3, + }; + enum class NotifyType : u32 { + WriteOnly = 0, + WriteThenAwaken = 1, + }; + + enum class MonochromePatternColorFormat : u32 { + A8X8R8G6B5 = 0, + A1R5G5B5 = 1, + A8R8G8B8 = 2, + A8Y8 = 3, + A8X8Y16 = 4, + Y32 = 5, + }; + + enum class MonochromePatternFormat : u32 { + CGA6_M1 = 0, + LE_M1 = 1, + }; + + union Regs { + static constexpr std::size_t NUM_REGS = 0x258; + struct { + u32 object; + INSERT_UNION_PADDING_WORDS(0x3F); + u32 no_operation; + NotifyType notify; + INSERT_UNION_PADDING_WORDS(0x2); + u32 wait_for_idle; + INSERT_UNION_PADDING_WORDS(0xB); + u32 pm_trigger; + INSERT_UNION_PADDING_WORDS(0xF); + u32 context_dma_notify; + u32 dst_context_dma; + u32 src_context_dma; + u32 semaphore_context_dma; + INSERT_UNION_PADDING_WORDS(0x1C); + Surface dst; + CpuIndexWrap pixels_from_cpu_index_wrap; + u32 kind2d_check_enable; + Surface src; + SectorPromotion pixels_from_memory_sector_promotion; + INSERT_UNION_PADDING_WORDS(0x1); + NumTpcs num_tpcs; + u32 render_enable_addr_upper; + u32 render_enable_addr_lower; + RenderEnableMode render_enable_mode; + INSERT_UNION_PADDING_WORDS(0x4); + u32 clip_x0; + u32 clip_y0; + u32 clip_width; + u32 clip_height; + BitField<0, 1, u32> clip_enable; + BitField<0, 3, ColorKeyFormat> color_key_format; + u32 color_key; + BitField<0, 1, u32> color_key_enable; + BitField<0, 8, u32> rop; + u32 beta1; + Beta4 beta4; + Operation operation; + union { + BitField<0, 6, u32> x; + BitField<8, 6, u32> y; + } pattern_offset; + BitField<0, 2, PatternSelect> pattern_select; + INSERT_UNION_PADDING_WORDS(0xC); + struct { + BitField<0, 3, MonochromePatternColorFormat> color_format; + BitField<0, 1, MonochromePatternFormat> format; + u32 color0; + u32 color1; + u32 pattern0; + u32 pattern1; + } monochrome_pattern; + struct { + std::array<u32, 0x40> X8R8G8B8; + std::array<u32, 0x20> R5G6B5; + std::array<u32, 0x20> X1R5G5B5; + std::array<u32, 0x10> Y8; + } color_pattern; + INSERT_UNION_PADDING_WORDS(0x10); + struct { + u32 prim_mode; + u32 prim_color_format; + u32 prim_color; + u32 line_tie_break_bits; + INSERT_UNION_PADDING_WORDS(0x14); + u32 prim_point_xy; + INSERT_UNION_PADDING_WORDS(0x7); + std::array<Point, 0x40> prim_point; + } render_solid; + struct { + u32 data_type; + u32 color_format; + u32 index_format; + u32 mono_format; + u32 wrap; + u32 color0; + u32 color1; + u32 mono_opacity; + INSERT_UNION_PADDING_WORDS(0x6); + u32 src_width; + u32 src_height; + u32 dx_du_frac; + u32 dx_du_int; + u32 dx_dv_frac; + u32 dy_dv_int; + u32 dst_x0_frac; + u32 dst_x0_int; + u32 dst_y0_frac; + u32 dst_y0_int; + u32 data; + } pixels_from_cpu; + INSERT_UNION_PADDING_WORDS(0x3); + u32 big_endian_control; + INSERT_UNION_PADDING_WORDS(0x3); + struct { + BitField<0, 3, u32> block_shape; + BitField<0, 5, u32> corral_size; + BitField<0, 1, u32> safe_overlap; union { - u32 raw; BitField<0, 1, Origin> origin; BitField<4, 1, Filter> filter; - } blit_control; - + } sample_mode; INSERT_UNION_PADDING_WORDS(0x8); - - u32 blit_dst_x; - u32 blit_dst_y; - u32 blit_dst_width; - u32 blit_dst_height; - u64 blit_du_dx; - u64 blit_dv_dy; - u64 blit_src_x; - u64 blit_src_y; - - INSERT_UNION_PADDING_WORDS(0x21); - }; - std::array<u32, NUM_REGS> reg_array; + s32 dst_x0; + s32 dst_y0; + s32 dst_width; + s32 dst_height; + s64 du_dx; + s64 dv_dy; + s64 src_x0; + s64 src_y0; + } pixels_from_memory; }; + std::array<u32, NUM_REGS> reg_array; } regs{}; struct Config { - Operation operation{}; - Filter filter{}; - Common::Rectangle<u32> src_rect; - Common::Rectangle<u32> dst_rect; + Operation operation; + Filter filter; + s32 dst_x0; + s32 dst_y0; + s32 dst_x1; + s32 dst_y1; + s32 src_x0; + s32 src_y0; + s32 src_x1; + s32 src_y1; }; private: @@ -156,25 +303,49 @@ private: /// Performs the copy from the source surface to the destination surface as configured in the /// registers. - void HandleSurfaceCopy(); + void Blit(); }; #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ + static_assert(offsetof(Fermi2D::Regs, field_name) == position, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(dst, 0x80); -ASSERT_REG_POSITION(src, 0x8C); -ASSERT_REG_POSITION(operation, 0xAB); -ASSERT_REG_POSITION(blit_control, 0x223); -ASSERT_REG_POSITION(blit_dst_x, 0x22c); -ASSERT_REG_POSITION(blit_dst_y, 0x22d); -ASSERT_REG_POSITION(blit_dst_width, 0x22e); -ASSERT_REG_POSITION(blit_dst_height, 0x22f); -ASSERT_REG_POSITION(blit_du_dx, 0x230); -ASSERT_REG_POSITION(blit_dv_dy, 0x232); -ASSERT_REG_POSITION(blit_src_x, 0x234); -ASSERT_REG_POSITION(blit_src_y, 0x236); +ASSERT_REG_POSITION(object, 0x0); +ASSERT_REG_POSITION(no_operation, 0x100); +ASSERT_REG_POSITION(notify, 0x104); +ASSERT_REG_POSITION(wait_for_idle, 0x110); +ASSERT_REG_POSITION(pm_trigger, 0x140); +ASSERT_REG_POSITION(context_dma_notify, 0x180); +ASSERT_REG_POSITION(dst_context_dma, 0x184); +ASSERT_REG_POSITION(src_context_dma, 0x188); +ASSERT_REG_POSITION(semaphore_context_dma, 0x18C); +ASSERT_REG_POSITION(dst, 0x200); +ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228); +ASSERT_REG_POSITION(kind2d_check_enable, 0x22C); +ASSERT_REG_POSITION(src, 0x230); +ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258); +ASSERT_REG_POSITION(num_tpcs, 0x260); +ASSERT_REG_POSITION(render_enable_addr_upper, 0x264); +ASSERT_REG_POSITION(render_enable_addr_lower, 0x268); +ASSERT_REG_POSITION(clip_x0, 0x280); +ASSERT_REG_POSITION(clip_y0, 0x284); +ASSERT_REG_POSITION(clip_width, 0x288); +ASSERT_REG_POSITION(clip_height, 0x28c); +ASSERT_REG_POSITION(clip_enable, 0x290); +ASSERT_REG_POSITION(color_key_format, 0x294); +ASSERT_REG_POSITION(color_key, 0x298); +ASSERT_REG_POSITION(rop, 0x2A0); +ASSERT_REG_POSITION(beta1, 0x2A4); +ASSERT_REG_POSITION(beta4, 0x2A8); +ASSERT_REG_POSITION(operation, 0x2AC); +ASSERT_REG_POSITION(pattern_offset, 0x2B0); +ASSERT_REG_POSITION(pattern_select, 0x2B4); +ASSERT_REG_POSITION(monochrome_pattern, 0x2E8); +ASSERT_REG_POSITION(color_pattern, 0x300); +ASSERT_REG_POSITION(render_solid, 0x580); +ASSERT_REG_POSITION(pixels_from_cpu, 0x800); +ASSERT_REG_POSITION(big_endian_control, 0x870); +ASSERT_REG_POSITION(pixels_from_memory, 0x880); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 898370739..ba387506e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { - const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); - ASSERT(cbuf_mask[regs.tex_cb_index]); - - const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; - ASSERT(texinfo.Address() != 0); - - const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); - ASSERT(address < texinfo.Address() + texinfo.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; - return GetTextureInfo(tex_handle); -} - -Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { - return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} - u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { ASSERT(stage == ShaderType::Compute); const auto& buffer = launch_description.const_buffer_config[const_buffer]; @@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { const Texture::TextureHandle tex_handle{handle}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); - SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); - result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); + const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + + SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); + result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); return result; } diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7f2500aab..51a041202 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -209,11 +209,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - Texture::FullTextureInfo GetTexture(std::size_t offset) const; - - /// Given a texture handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index dc71b2eec..9911140e9 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -14,8 +14,8 @@ namespace Tegra::Engines { -KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) - : system{system}, upload_state{memory_manager, regs.upload} {} +KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager) + : system{system_}, upload_state{memory_manager, regs.upload} {} KeplerMemory::~KeplerMemory() = default; diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5b7f71a00..62483589e 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -35,7 +35,7 @@ namespace Tegra::Engines { class KeplerMemory final : public EngineInterface { public: - KeplerMemory(Core::System& system, MemoryManager& memory_manager); + explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager); ~KeplerMemory(); /// Write the value to the register identified by method. diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 6287df633..9be651e24 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cinttypes> #include <cstring> #include <optional> #include "common/assert.h" @@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume OnMemoryWrite(); } return; + case MAXWELL3D_REG_INDEX(fragment_barrier): + return rasterizer->FragmentBarrier(); + case MAXWELL3D_REG_INDEX(tiled_cache_barrier): + return rasterizer->TiledCacheBarrier(); } } @@ -359,7 +362,7 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { } void Maxwell3D::FlushMMEInlineDraw() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); @@ -504,8 +507,7 @@ void Maxwell3D::ProcessCounterReset() { rasterizer->ResetCounter(QueryType::SamplesPassed); break; default: - LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", - static_cast<int>(regs.counter_reset)); + LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset); break; } } @@ -520,7 +522,7 @@ void Maxwell3D::ProcessSyncPoint() { } void Maxwell3D::DrawArrays() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); @@ -558,12 +560,12 @@ std::optional<u64> Maxwell3D::GetQueryResult() { return 0; case Regs::QuerySelect::SamplesPassed: // Deferred. - rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed, system.GPU().GetTicks()); return std::nullopt; default: LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", - static_cast<u32>(regs.query.query_get.select.Value())); + regs.query.query_get.select.Value()); return 1; } } @@ -640,7 +642,7 @@ void Maxwell3D::FinishCBData() { } Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { - const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; + const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)}; Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); @@ -649,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { } Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { - const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; + const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)}; Texture::TSCEntry tsc_entry; memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } -Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { - return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} - -Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const { - const auto stage_index = static_cast<std::size_t>(stage); - const auto& shader = state.shader_stages[stage_index]; - const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - const GPUVAddr tex_info_address = - tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); - - ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - - return GetTextureInfo(tex_handle); -} - u32 Maxwell3D::GetRegisterValue(u32 method) const { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); return regs.reg_array[method]; } void Maxwell3D::ProcessClearBuffers() { - ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && - regs.clear_buffers.R == regs.clear_buffers.B && - regs.clear_buffers.R == regs.clear_buffers.A); - rasterizer->Clear(); } @@ -693,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse ASSERT(stage != ShaderType::Compute); const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); - return result; + return memory_manager.Read<u32>(buffer.address + offset); } SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { @@ -713,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { const Texture::TextureHandle tex_handle{handle}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); - SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); - result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); + const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + + SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); + result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); return result; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index b0d9559d0..bf9e07c9b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -438,16 +438,6 @@ public: DecrWrapOGL = 0x8508, }; - enum class MemoryLayout : u32 { - Linear = 0, - BlockLinear = 1, - }; - - enum class InvMemoryLayout : u32 { - BlockLinear = 0, - Linear = 1, - }; - enum class CounterReset : u32 { SampleCnt = 0x01, Unk02 = 0x02, @@ -589,21 +579,31 @@ public: NegativeW = 7, }; + enum class SamplerIndex : u32 { + Independently = 0, + ViaHeaderIndex = 1, + }; + + struct TileMode { + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + BitField<12, 1, u32> is_pitch_linear; + BitField<16, 1, u32> is_3d; + }; + }; + static_assert(sizeof(TileMode) == 4); + struct RenderTargetConfig { u32 address_high; u32 address_low; u32 width; u32 height; Tegra::RenderTargetFormat format; + TileMode tile_mode; union { - BitField<0, 3, u32> block_width; - BitField<4, 3, u32> block_height; - BitField<8, 3, u32> block_depth; - BitField<12, 1, InvMemoryLayout> type; - BitField<16, 1, u32> is_3d; - } memory_layout; - union { - BitField<0, 16, u32> layers; + BitField<0, 16, u32> depth; BitField<16, 1, u32> volume; }; u32 layer_stride; @@ -832,7 +832,11 @@ public: u32 patch_vertices; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x4); + + u32 fragment_barrier; + + INSERT_UNION_PADDING_WORDS(0x7); std::array<ScissorTest, NumViewports> scissor_test; @@ -842,7 +846,15 @@ public: u32 stencil_back_mask; u32 stencil_back_func_mask; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x5); + + u32 invalidate_texture_data_cache; + + INSERT_UNION_PADDING_WORDS(0x1); + + u32 tiled_cache_barrier; + + INSERT_UNION_PADDING_WORDS(0x4); u32 color_mask_common; @@ -866,12 +878,7 @@ public: u32 address_high; u32 address_low; Tegra::DepthFormat format; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - BitField<20, 1, InvMemoryLayout> type; - } memory_layout; + TileMode tile_mode; u32 layer_stride; GPUVAddr Address() const { @@ -880,7 +887,18 @@ public: } } zeta; - INSERT_UNION_PADDING_WORDS(0x41); + struct { + union { + BitField<0, 16, u32> x; + BitField<16, 16, u32> width; + }; + union { + BitField<0, 16, u32> y; + BitField<16, 16, u32> height; + }; + } render_area; + + INSERT_UNION_PADDING_WORDS(0x3F); union { BitField<0, 4, u32> stencil; @@ -921,7 +939,7 @@ public: BitField<25, 3, u32> map_7; }; - u32 GetMap(std::size_t index) const { + u32 Map(std::size_t index) const { const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, map_4, map_5, map_6, map_7}; ASSERT(index < maps.size()); @@ -934,11 +952,13 @@ public: u32 zeta_width; u32 zeta_height; union { - BitField<0, 16, u32> zeta_layers; + BitField<0, 16, u32> zeta_depth; BitField<16, 1, u32> zeta_volume; }; - INSERT_UNION_PADDING_WORDS(0x26); + SamplerIndex sampler_index; + + INSERT_UNION_PADDING_WORDS(0x25); u32 depth_test_enable; @@ -964,6 +984,7 @@ public: float b; float a; } blend_color; + INSERT_UNION_PADDING_WORDS(0x4); struct { @@ -1001,7 +1022,12 @@ public: float line_width_smooth; float line_width_aliased; - INSERT_UNION_PADDING_WORDS(0x1F); + INSERT_UNION_PADDING_WORDS(0x1B); + + u32 invalidate_sampler_cache_no_wfi; + u32 invalidate_texture_header_cache_no_wfi; + + INSERT_UNION_PADDING_WORDS(0x2); u32 vb_element_base; u32 vb_base_instance; @@ -1045,13 +1071,13 @@ public: } condition; struct { - u32 tsc_address_high; - u32 tsc_address_low; - u32 tsc_limit; + u32 address_high; + u32 address_low; + u32 limit; - GPUVAddr TSCAddress() const { - return static_cast<GPUVAddr>( - (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low); + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); } } tsc; @@ -1062,13 +1088,13 @@ public: u32 line_smooth_enable; struct { - u32 tic_address_high; - u32 tic_address_low; - u32 tic_limit; + u32 address_high; + u32 address_low; + u32 limit; - GPUVAddr TICAddress() const { - return static_cast<GPUVAddr>( - (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low); + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); } } tic; @@ -1397,12 +1423,6 @@ public: void FlushMMEInlineDraw(); - /// Given a texture handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - - /// Returns the texture information for a specific texture in a specific shader stage. - Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; @@ -1473,39 +1493,6 @@ private: void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); - Core::System& system; - MemoryManager& memory_manager; - - VideoCore::RasterizerInterface* rasterizer = nullptr; - - /// Start offsets of each macro in macro_memory - std::array<u32, 0x80> macro_positions = {}; - - std::array<bool, Regs::NUM_REGS> mme_inline{}; - - /// Macro method that is currently being executed / being fed parameters. - u32 executing_macro = 0; - /// Parameters that have been submitted to the macro call so far. - std::vector<u32> macro_params; - - /// Interpreter for the macro codes uploaded to the GPU. - std::unique_ptr<MacroEngine> macro_engine; - - static constexpr u32 null_cb_data = 0xFFFFFFFF; - struct { - std::array<std::array<u32, 0x4000>, 16> buffer; - u32 current{null_cb_data}; - u32 id{null_cb_data}; - u32 start_pos{}; - u32 counter{}; - } cb_data_state; - - Upload::State upload_state; - - bool execute_on{true}; - - std::array<u8, Regs::NUM_REGS> dirty_pointers{}; - /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; @@ -1514,8 +1501,8 @@ private: /** * Call a macro on this engine. + * * @param method Method to call - * @param num_parameters Number of arguments * @param parameters Arguments to the method call */ void CallMacroMethod(u32 method, const std::vector<u32>& parameters); @@ -1564,6 +1551,38 @@ private: /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional<u64> GetQueryResult(); + + Core::System& system; + MemoryManager& memory_manager; + + VideoCore::RasterizerInterface* rasterizer = nullptr; + + /// Start offsets of each macro in macro_memory + std::array<u32, 0x80> macro_positions{}; + + std::array<bool, Regs::NUM_REGS> mme_inline{}; + + /// Macro method that is currently being executed / being fed parameters. + u32 executing_macro = 0; + /// Parameters that have been submitted to the macro call so far. + std::vector<u32> macro_params; + + /// Interpreter for the macro codes uploaded to the GPU. + std::unique_ptr<MacroEngine> macro_engine; + + static constexpr u32 null_cb_data = 0xFFFFFFFF; + struct CBDataState { + std::array<std::array<u32, 0x4000>, 16> buffer; + u32 current{null_cb_data}; + u32 id{null_cb_data}; + u32 start_pos{}; + u32 counter{}; + }; + CBDataState cb_data_state; + + Upload::State upload_state; + + bool execute_on{true}; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -1599,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); ASSERT_REG_POSITION(patch_vertices, 0x373); +ASSERT_REG_POSITION(fragment_barrier, 0x378); ASSERT_REG_POSITION(scissor_test, 0x380); ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); +ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD); +ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF); ASSERT_REG_POSITION(color_mask_common, 0x3E4); ASSERT_REG_POSITION(depth_bounds, 0x3E7); ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); @@ -1610,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); ASSERT_REG_POSITION(zeta, 0x3F8); +ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); @@ -1618,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); -ASSERT_REG_POSITION(zeta_layers, 0x48c); +ASSERT_REG_POSITION(zeta_depth, 0x48c); +ASSERT_REG_POSITION(sampler_index, 0x48D); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); @@ -1642,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(line_width_smooth, 0x4EC); ASSERT_REG_POSITION(line_width_aliased, 0x4ED); +ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509); +ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 8fa359d0a..ba750748c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -16,8 +16,10 @@ namespace Tegra::Engines { using namespace Texture; -MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) - : system{system}, memory_manager{memory_manager} {} +MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) + : system{system_}, memory_manager{memory_manager_} {} + +MaxwellDMA::~MaxwellDMA() = default; void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); @@ -94,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() { } void MaxwellDMA::CopyBlockLinearToPitch() { + UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); UNIMPLEMENTED_IF(regs.src_params.layer != 0); @@ -133,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { } void MaxwellDMA::CopyPitchToBlockLinear() { + UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); + const auto& dst_params = regs.dst_params; const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; const u32 width = dst_params.width; diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 50f445efc..3c59eeb13 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -72,11 +72,13 @@ public: struct RenderEnable { enum class Mode : u32 { - FALSE = 0, - TRUE = 1, - CONDITIONAL = 2, - RENDER_IF_EQUAL = 3, - RENDER_IF_NOT_EQUAL = 4, + // Note: This uses Pascal case in order to avoid the identifiers + // FALSE and TRUE, which are reserved on Darwin. + False = 0, + True = 1, + Conditional = 2, + RenderIfEqual = 3, + RenderIfNotEqual = 4, }; PackedGPUVAddr address; @@ -185,8 +187,8 @@ public: }; static_assert(sizeof(RemapConst) == 12); - explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); - ~MaxwellDMA() = default; + explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); + ~MaxwellDMA(); /// Write the value to the register identified by method. void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 37d17efdc..8b45f1b62 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1437,8 +1437,7 @@ union Instruction { return TextureType::TextureCube; } - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", - static_cast<u32>(texture_info.Value())); + LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); UNREACHABLE(); return TextureType::Texture1D; } @@ -1533,8 +1532,7 @@ union Instruction { return TextureType::Texture3D; } - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", - static_cast<u32>(texture_info.Value())); + LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); UNREACHABLE(); return TextureType::Texture1D; } diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index de6991ef6..3512283ff 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "core/core.h" +#include "video_core/delayed_destruction_ring.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -17,11 +18,11 @@ namespace VideoCommon { class FenceBase { public: - FenceBase(u32 payload, bool is_stubbed) - : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {} + explicit FenceBase(u32 payload_, bool is_stubbed_) + : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {} - FenceBase(GPUVAddr address, u32 payload, bool is_stubbed) - : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {} + explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_) + : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {} GPUVAddr GetAddress() const { return address; @@ -47,6 +48,11 @@ protected: template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> class FenceManager { public: + /// Notify the fence manager about a new frame + void TickFrame() { + delayed_destruction_ring.Tick(); + } + void SignalSemaphore(GPUVAddr addr, u32 value) { TryReleasePendingFences(); const bool should_flush = ShouldFlush(); @@ -86,7 +92,7 @@ public: } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } - fences.pop(); + PopFence(); } } @@ -132,7 +138,7 @@ private: } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } - fences.pop(); + PopFence(); } } @@ -158,7 +164,14 @@ private: query_cache.CommitAsyncFlushes(); } + void PopFence() { + delayed_destruction_ring.Push(std::move(fences.front())); + fences.pop(); + } + std::queue<TFence> fences; + + DelayedDestructionRing<TFence, 6> delayed_destruction_ring; }; } // namespace VideoCommon diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h new file mode 100644 index 000000000..b86c3a757 --- /dev/null +++ b/src/video_core/framebuffer_config.h @@ -0,0 +1,31 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Tegra { + +/** + * Struct describing framebuffer configuration + */ +struct FramebufferConfig { + enum class PixelFormat : u32 { + A8B8G8R8_UNORM = 1, + RGB565_UNORM = 4, + B8G8R8A8_UNORM = 5, + }; + + VAddr address{}; + u32 offset{}; + u32 width{}; + u32 height{}; + u32 stride{}; + PixelFormat pixel_format{}; + + using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; + TransformFlags transform_flags{}; + Common::Rectangle<int> crop_rect; +}; + +} // namespace Tegra diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index e91f52938..6ab06775f 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -10,6 +10,7 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/frontend/emu_window.h" +#include "core/hardware_interrupt_manager.h" #include "core/memory.h" #include "core/settings.h" #include "video_core/engines/fermi_2d.h" @@ -36,7 +37,8 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, - shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} + shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, + gpu_thread{system_, is_async_} {} GPU::~GPU() = default; @@ -198,10 +200,6 @@ void GPU::SyncGuestHost() { renderer->Rasterizer().SyncGuestHost(); } -void GPU::OnCommandListEnd() { - renderer->Rasterizer().ReleaseFences(); -} - enum class GpuSemaphoreOperation { AcquireEqual = 0x1, WriteLong = 0x2, @@ -232,8 +230,12 @@ void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); } else { for (std::size_t i = 0; i < amount; i++) { - CallPullerMethod( - {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)}); + CallPullerMethod(MethodCall{ + method, + base_start[i], + subchannel, + methods_pending - static_cast<u32>(i), + }); } } } @@ -295,8 +297,7 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { break; } default: - LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", - static_cast<u32>(method)); + LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); break; } } @@ -375,7 +376,7 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); break; default: - UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id)); + UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); } } @@ -388,8 +389,7 @@ void GPU::ProcessFenceActionMethod() { IncrementSyncPoint(regs.fence_action.syncpoint_id); break; default: - UNIMPLEMENTED_MSG("Unimplemented operation {}", - static_cast<u32>(regs.fence_action.op.Value())); + UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); } } @@ -459,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() { } } +void GPU::Start() { + gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher); + cpu_context = renderer->GetRenderWindow().CreateSharedContext(); + cpu_context->MakeCurrent(); +} + +void GPU::ObtainContext() { + cpu_context->MakeCurrent(); +} + +void GPU::ReleaseContext() { + cpu_context->DoneCurrent(); +} + +void GPU::PushGPUEntries(Tegra::CommandList&& entries) { + gpu_thread.SubmitList(std::move(entries)); +} + +void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + if (!use_nvdec) { + return; + } + // This condition fires when a video stream ends, clear all intermediary data + if (entries[0].raw == 0xDEADB33F) { + cdma_pusher.reset(); + return; + } + if (!cdma_pusher) { + cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); + } + + // SubmitCommandBuffer would make the nvdec operations async, this is not currently working + // TODO(ameerj): RE proper async nvdec operation + // gpu_thread.SubmitCommandBuffer(std::move(entries)); + + cdma_pusher->Push(std::move(entries)); + cdma_pusher->DispatchCalls(); +} + +void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { + gpu_thread.SwapBuffers(framebuffer); +} + +void GPU::FlushRegion(VAddr addr, u64 size) { + gpu_thread.FlushRegion(addr, size); +} + +void GPU::InvalidateRegion(VAddr addr, u64 size) { + gpu_thread.InvalidateRegion(addr, size); +} + +void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { + gpu_thread.FlushAndInvalidateRegion(addr, size); +} + +void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { + auto& interrupt_manager = system.InterruptManager(); + interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); +} + +void GPU::WaitIdle() const { + gpu_thread.WaitIdle(); +} + +void GPU::OnCommandListEnd() { + if (is_async) { + // This command only applies to asynchronous GPU mode + gpu_thread.OnCommandListEnd(); + } +} + } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 21410e125..d81e38680 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -15,6 +15,8 @@ #include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/cdma_pusher.h" #include "video_core/dma_pusher.h" +#include "video_core/framebuffer_config.h" +#include "video_core/gpu_thread.h" using CacheAddr = std::uintptr_t; [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { @@ -101,28 +103,6 @@ enum class DepthFormat : u32 { struct CommandListHeader; class DebugContext; -/** - * Struct describing framebuffer configuration - */ -struct FramebufferConfig { - enum class PixelFormat : u32 { - A8B8G8R8_UNORM = 1, - RGB565_UNORM = 4, - B8G8R8A8_UNORM = 5, - }; - - VAddr address; - u32 offset; - u32 width; - u32 height; - u32 stride; - PixelFormat pixel_format; - - using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; - TransformFlags transform_flags; - Common::Rectangle<int> crop_rect; -}; - namespace Engines { class Fermi2D; class Maxwell3D; @@ -141,7 +121,7 @@ enum class EngineID { class MemoryManager; -class GPU { +class GPU final { public: struct MethodCall { u32 method{}; @@ -149,17 +129,17 @@ public: u32 subchannel{}; u32 method_count{}; - MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0) - : method(method), argument(argument), subchannel(subchannel), - method_count(method_count) {} + explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0) + : method(method_), argument(argument_), subchannel(subchannel_), + method_count(method_count_) {} [[nodiscard]] bool IsLastCall() const { return method_count <= 1; } }; - explicit GPU(Core::System& system, bool is_async, bool use_nvdec); - virtual ~GPU(); + explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); + ~GPU(); /// Binds a renderer to the GPU. void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); @@ -176,7 +156,7 @@ public: /// Synchronizes CPU writes with Host GPU memory. void SyncGuestHost(); /// Signal the ending of command list. - virtual void OnCommandListEnd(); + void OnCommandListEnd(); /// Request a host GPU memory flush from the CPU. [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); @@ -240,7 +220,7 @@ public: } // Waits for the GPU to finish working - virtual void WaitIdle() const = 0; + void WaitIdle() const; /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value); @@ -330,34 +310,34 @@ public: /// Performs any additional setup necessary in order to begin GPU emulation. /// This can be used to launch any necessary threads and register any necessary /// core timing events. - virtual void Start() = 0; + void Start(); /// Obtain the CPU Context - virtual void ObtainContext() = 0; + void ObtainContext(); /// Release the CPU Context - virtual void ReleaseContext() = 0; + void ReleaseContext(); /// Push GPU command entries to be processed - virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; + void PushGPUEntries(Tegra::CommandList&& entries); /// Push GPU command buffer entries to be processed - virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0; + void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); /// Swap buffers (render frame) - virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; + void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(VAddr addr, u64 size) = 0; + void FlushRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(VAddr addr, u64 size) = 0; + void InvalidateRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; + void FlushAndInvalidateRegion(VAddr addr, u64 size); protected: - virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; + void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const; private: void ProcessBindMethod(const MethodCall& method_call); @@ -414,8 +394,8 @@ private: std::condition_variable sync_cv; struct FlushRequest { - FlushRequest(u64 fence, VAddr addr, std::size_t size) - : fence{fence}, addr{addr}, size{size} {} + explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) + : fence{fence_}, addr{addr_}, size{size_} {} u64 fence; VAddr addr; std::size_t size; @@ -427,6 +407,9 @@ private: std::mutex flush_request_mutex; const bool is_async; + + VideoCommon::GPUThread::ThreadManager gpu_thread; + std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp deleted file mode 100644 index a9baaf7ef..000000000 --- a/src/video_core/gpu_asynch.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "core/core.h" -#include "core/hardware_interrupt_manager.h" -#include "video_core/gpu_asynch.h" -#include "video_core/gpu_thread.h" -#include "video_core/renderer_base.h" - -namespace VideoCommon { - -GPUAsynch::GPUAsynch(Core::System& system, bool use_nvdec) - : GPU{system, true, use_nvdec}, gpu_thread{system} {} - -GPUAsynch::~GPUAsynch() = default; - -void GPUAsynch::Start() { - gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher); - cpu_context = renderer->GetRenderWindow().CreateSharedContext(); - cpu_context->MakeCurrent(); -} - -void GPUAsynch::ObtainContext() { - cpu_context->MakeCurrent(); -} - -void GPUAsynch::ReleaseContext() { - cpu_context->DoneCurrent(); -} - -void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { - gpu_thread.SubmitList(std::move(entries)); -} - -void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { - if (!use_nvdec) { - return; - } - // This condition fires when a video stream ends, clear all intermediary data - if (entries[0].raw == 0xDEADB33F) { - cdma_pusher.reset(); - return; - } - if (!cdma_pusher) { - cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); - } - - // SubmitCommandBuffer would make the nvdec operations async, this is not currently working - // TODO(ameerj): RE proper async nvdec operation - // gpu_thread.SubmitCommandBuffer(std::move(entries)); - - cdma_pusher->Push(std::move(entries)); - cdma_pusher->DispatchCalls(); -} - -void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - gpu_thread.SwapBuffers(framebuffer); -} - -void GPUAsynch::FlushRegion(VAddr addr, u64 size) { - gpu_thread.FlushRegion(addr, size); -} - -void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { - gpu_thread.InvalidateRegion(addr, size); -} - -void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { - gpu_thread.FlushAndInvalidateRegion(addr, size); -} - -void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { - auto& interrupt_manager = system.InterruptManager(); - interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); -} - -void GPUAsynch::WaitIdle() const { - gpu_thread.WaitIdle(); -} - -void GPUAsynch::OnCommandListEnd() { - gpu_thread.OnCommandListEnd(); -} - -} // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h deleted file mode 100644 index 0c0872e73..000000000 --- a/src/video_core/gpu_asynch.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/gpu.h" -#include "video_core/gpu_thread.h" - -namespace Core::Frontend { -class GraphicsContext; -} - -namespace VideoCore { -class RendererBase; -} // namespace VideoCore - -namespace VideoCommon { - -/// Implementation of GPU interface that runs the GPU asynchronously -class GPUAsynch final : public Tegra::GPU { -public: - explicit GPUAsynch(Core::System& system, bool use_nvdec); - ~GPUAsynch() override; - - void Start() override; - void ObtainContext() override; - void ReleaseContext() override; - void PushGPUEntries(Tegra::CommandList&& entries) override; - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; - void WaitIdle() const override; - - void OnCommandListEnd() override; - -protected: - void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; - -private: - GPUThread::ThreadManager gpu_thread; - std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; -}; - -} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp deleted file mode 100644 index ecf7bbdf3..000000000 --- a/src/video_core/gpu_synch.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/gpu_synch.h" -#include "video_core/renderer_base.h" - -namespace VideoCommon { - -GPUSynch::GPUSynch(Core::System& system, bool use_nvdec) : GPU{system, false, use_nvdec} {} - -GPUSynch::~GPUSynch() = default; - -void GPUSynch::Start() {} - -void GPUSynch::ObtainContext() { - renderer->Context().MakeCurrent(); -} - -void GPUSynch::ReleaseContext() { - renderer->Context().DoneCurrent(); -} - -void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { - dma_pusher->Push(std::move(entries)); - dma_pusher->DispatchCalls(); -} - -void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { - if (!use_nvdec) { - return; - } - // This condition fires when a video stream ends, clears all intermediary data - if (entries[0].raw == 0xDEADB33F) { - cdma_pusher.reset(); - return; - } - if (!cdma_pusher) { - cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); - } - cdma_pusher->Push(std::move(entries)); - cdma_pusher->DispatchCalls(); -} - -void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - renderer->SwapBuffers(framebuffer); -} - -void GPUSynch::FlushRegion(VAddr addr, u64 size) { - renderer->Rasterizer().FlushRegion(addr, size); -} - -void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { - renderer->Rasterizer().InvalidateRegion(addr, size); -} - -void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { - renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); -} - -} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h deleted file mode 100644 index 9d778c71a..000000000 --- a/src/video_core/gpu_synch.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/gpu.h" - -namespace Core::Frontend { -class GraphicsContext; -} - -namespace VideoCore { -class RendererBase; -} // namespace VideoCore - -namespace VideoCommon { - -/// Implementation of GPU interface that runs the GPU synchronously -class GPUSynch final : public Tegra::GPU { -public: - explicit GPUSynch(Core::System& system, bool use_nvdec); - ~GPUSynch() override; - - void Start() override; - void ObtainContext() override; - void ReleaseContext() override; - void PushGPUEntries(Tegra::CommandList&& entries) override; - void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; - void WaitIdle() const override {} - -protected: - void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, - [[maybe_unused]] u32 value) const override {} -}; - -} // namespace VideoCommon diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 4b8f58283..7e490bcc3 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" @@ -21,6 +22,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, SynchState& state, Tegra::CDmaPusher& cdma_pusher) { std::string name = "yuzu:GPU"; MicroProfileOnThreadCreate(name.c_str()); + SCOPE_EXIT({ MicroProfileOnThreadExit(); }); + Common::SetCurrentThreadName(name.c_str()); Common::SetCurrentThreadPriority(Common::ThreadPriority::High); system.RegisterHostThread(); @@ -39,23 +42,23 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, CommandDataContainer next; while (state.is_running) { next = state.queue.PopWait(); - if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { + if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { dma_pusher.Push(std::move(submit_list->entries)); dma_pusher.DispatchCalls(); - } else if (const auto command_list = std::get_if<SubmitChCommandEntries>(&next.data)) { + } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) { // NVDEC cdma_pusher.Push(std::move(command_list->entries)); cdma_pusher.DispatchCalls(); - } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { + } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { renderer.Rasterizer().ReleaseFences(); } else if (std::holds_alternative<GPUTickCommand>(next.data)) { system.GPU().TickWork(); - } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { - renderer.Rasterizer().FlushRegion(data->addr, data->size); - } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { - renderer.Rasterizer().OnCPUWrite(data->addr, data->size); + } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { + renderer.Rasterizer().FlushRegion(flush->addr, flush->size); + } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { + renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size); } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { return; } else { @@ -65,7 +68,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, } } -ThreadManager::ThreadManager(Core::System& system) : system{system} {} +ThreadManager::ThreadManager(Core::System& system_, bool is_async_) + : system{system_}, is_async{is_async_} {} ThreadManager::~ThreadManager() { if (!thread.joinable()) { @@ -97,19 +101,30 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { } void ThreadManager::FlushRegion(VAddr addr, u64 size) { - if (!Settings::IsGPULevelHigh()) { + if (!is_async) { + // Always flush with synchronous GPU mode PushCommand(FlushRegionCommand(addr, size)); return; } - if (!Settings::IsGPULevelExtreme()) { - return; - } - if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { + + // Asynchronous GPU mode + switch (Settings::values.gpu_accuracy.GetValue()) { + case Settings::GPUAccuracy::Normal: + PushCommand(FlushRegionCommand(addr, size)); + break; + case Settings::GPUAccuracy::High: + // TODO(bunnei): Is this right? Preserving existing behavior for now + break; + case Settings::GPUAccuracy::Extreme: { auto& gpu = system.GPU(); u64 fence = gpu.RequestFlush(addr, size); PushCommand(GPUTickCommand()); while (fence > gpu.CurrentFlushRequestFence()) { } + break; + } + default: + UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue()); } } @@ -123,7 +138,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { } void ThreadManager::WaitIdle() const { - while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && + system.IsPoweredOn()) { } } @@ -134,6 +150,12 @@ void ThreadManager::OnCommandListEnd() { u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); + + if (!is_async) { + // In synchronous GPU mode, block the caller until the command has executed + WaitIdle(); + } + return fence; } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 32a34e3a7..2775629e7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -10,8 +10,9 @@ #include <optional> #include <thread> #include <variant> + #include "common/threadsafe_queue.h" -#include "video_core/gpu.h" +#include "video_core/framebuffer_config.h" namespace Tegra { struct FramebufferConfig; @@ -25,6 +26,10 @@ class GraphicsContext; class System; } // namespace Core +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + namespace VideoCommon::GPUThread { /// Command to signal to the GPU thread that processing has ended @@ -32,30 +37,30 @@ struct EndProcessingCommand final {}; /// Command to signal to the GPU thread that a command list is ready for processing struct SubmitListCommand final { - explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} + explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {} Tegra::CommandList entries; }; /// Command to signal to the GPU thread that a cdma command list is ready for processing struct SubmitChCommandEntries final { - explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries) - : entries{std::move(entries)} {} + explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries_) + : entries{std::move(entries_)} {} Tegra::ChCommandHeaderList entries; }; /// Command to signal to the GPU thread that a swap buffers is pending struct SwapBuffersCommand final { - explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) - : framebuffer{std::move(framebuffer)} {} + explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer_) + : framebuffer{std::move(framebuffer_)} {} std::optional<Tegra::FramebufferConfig> framebuffer; }; /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { - explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} VAddr addr; u64 size; @@ -63,7 +68,7 @@ struct FlushRegionCommand final { /// Command to signal to the GPU thread to invalidate a region struct InvalidateRegionCommand final { - explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} VAddr addr; u64 size; @@ -71,8 +76,8 @@ struct InvalidateRegionCommand final { /// Command to signal to the GPU thread to flush and invalidate a region struct FlushAndInvalidateRegionCommand final { - explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) - : addr{addr}, size{size} {} + explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_) + : addr{addr_}, size{size_} {} VAddr addr; u64 size; @@ -92,8 +97,8 @@ using CommandData = struct CommandDataContainer { CommandDataContainer() = default; - CommandDataContainer(CommandData&& data, u64 next_fence) - : data{std::move(data)}, fence{next_fence} {} + explicit CommandDataContainer(CommandData&& data_, u64 next_fence_) + : data{std::move(data_)}, fence{next_fence_} {} CommandData data; u64 fence{}; @@ -112,7 +117,7 @@ struct SynchState final { /// Class used to manage the GPU thread class ThreadManager final { public: - explicit ThreadManager(Core::System& system); + explicit ThreadManager(Core::System& system_, bool is_async_); ~ThreadManager(); /// Creates and starts the GPU thread. @@ -146,11 +151,11 @@ private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); -private: SynchState state; Core::System& system; std::thread thread; std::thread::id thread_id; + const bool is_async; }; } // namespace VideoCommon::GPUThread diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index 99450777e..21e569ba1 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h @@ -19,8 +19,8 @@ namespace VideoCore { class GuestDriverProfile { public: explicit GuestDriverProfile() = default; - explicit GuestDriverProfile(std::optional<u32> texture_handler_size) - : texture_handler_size{texture_handler_size} {} + explicit GuestDriverProfile(std::optional<u32> texture_handler_size_) + : texture_handler_size{texture_handler_size_} {} void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c157724a9..4c7399d5a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,8 +1,26 @@ -set(SHADER_SOURCES +set(SHADER_FILES + block_linear_unswizzle_2d.comp + block_linear_unswizzle_3d.comp + convert_depth_to_float.frag + convert_float_to_depth.frag + full_screen_triangle.vert + opengl_copy_bc4.comp opengl_present.frag opengl_present.vert + pitch_unswizzle.comp + vulkan_blit_color_float.frag + vulkan_blit_depth_stencil.frag + vulkan_present.frag + vulkan_present.vert + vulkan_quad_array.comp + vulkan_quad_indexed.comp + vulkan_uint8.comp ) +find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) + +set(GLSL_FLAGS "") + set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) @@ -10,27 +28,44 @@ set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) -foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) +foreach(FILENAME IN ITEMS ${SHADER_FILES}) string(REPLACE "." "_" SHADER_NAME ${FILENAME}) set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) - set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) - add_custom_command( - OUTPUT - ${HEADER_FILE} - COMMAND - ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} - MAIN_DEPENDENCY - ${SOURCE_FILE} - DEPENDS - ${INPUT_FILE} - # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified - ) - set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) + # Skip generating source headers on Vulkan exclusive files + if (NOT ${FILENAME} MATCHES "vulkan.*") + set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) + add_custom_command( + OUTPUT + ${SOURCE_HEADER_FILE} + COMMAND + ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + DEPENDS + ${INPUT_FILE} + # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) + endif() + # Skip compiling to SPIR-V OpenGL exclusive files + if (NOT ${FILENAME} MATCHES "opengl.*") + string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME) + set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h) + add_custom_command( + OUTPUT + ${SPIRV_HEADER_FILE} + COMMAND + ${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) + endif() endforeach() add_custom_target(host_shaders DEPENDS ${SHADER_HEADERS} SOURCES - ${SHADER_SOURCES} + ${SHADER_FILES} ) diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp new file mode 100644 index 000000000..a131be79e --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint layer_stride; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { + uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image; + +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { + pos = pos & SWIZZLE_MASK; + return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES + case 0: + return uvec4(u8data[offset], 0, 0, 0); + case 1: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 0: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 2: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 3: + return uvec4(u64data[offset / 8], 0, 0); + case 4: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec3 pos = gl_GlobalInvocationID + origin; + pos.x <<= bytes_per_block_log2; + + // Read as soon as possible due to its latency + const uint swizzle = SwizzleOffset(pos.xy); + + const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + + uint offset = 0; + offset += pos.z * layer_stride; + offset += (block_y >> block_height) * block_size; + offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; + offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; + offset += swizzle; + + const uvec4 texel = ReadTexel(offset); + const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; + imageStore(output_image, coord, texel); +} diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp new file mode 100644 index 000000000..bb6872e6b --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp @@ -0,0 +1,125 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint slice_size; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +UNIFORM(8) uint block_depth; +UNIFORM(9) uint block_depth_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { + uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image; + +layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { + pos = pos & SWIZZLE_MASK; + return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES + case 0: + return uvec4(u8data[offset], 0, 0, 0); + case 1: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 0: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 2: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 3: + return uvec4(u64data[offset / 8], 0, 0); + case 4: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec3 pos = gl_GlobalInvocationID + origin; + pos.x <<= bytes_per_block_log2; + + // Read as soon as possible due to its latency + const uint swizzle = SwizzleOffset(pos.xy); + + const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + + uint offset = 0; + offset += (pos.z >> block_depth) * slice_size; + offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height); + offset += (block_y >> block_height) * block_size; + offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; + offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; + offset += swizzle; + + const uvec4 texel = ReadTexel(offset); + const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; + imageStore(output_image, coord, texel); +} diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag new file mode 100644 index 000000000..624c58509 --- /dev/null +++ b/src/video_core/host_shaders/convert_depth_to_float.frag @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_texture; +layout(location = 0) out float output_color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + output_color = texelFetch(depth_texture, coord, 0).r; +} diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag new file mode 100644 index 000000000..d86c795f4 --- /dev/null +++ b/src/video_core/host_shaders/convert_float_to_depth.frag @@ -0,0 +1,13 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + float color = texelFetch(color_texture, coord, 0).r; + gl_FragDepth = color; +} diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert new file mode 100644 index 000000000..452ad6502 --- /dev/null +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +#ifdef VULKAN +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) vec2 tex_scale; +UNIFORM(1) vec2 tex_offset; +END_PUSH_CONSTANTS + +layout(location = 0) out vec2 texcoord; + +void main() { + float x = float((gl_VertexIndex & 1) << 2); + float y = float((gl_VertexIndex & 2) << 1); + gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); +} diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp new file mode 100644 index 000000000..7b8e20fbe --- /dev/null +++ b/src/video_core/host_shaders/opengl_copy_bc4.comp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 core +#extension GL_ARB_gpu_shader_int64 : require + +layout (local_size_x = 4, local_size_y = 4) in; + +layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input; +layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output; + +layout(location = 0) uniform uvec3 src_offset; +layout(location = 1) uniform uvec3 dst_offset; + +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt +uint DecompressBlock(uint64_t bits, uvec2 coord) { + const uint code_offset = 16 + 3 * (4 * coord.y + coord.x); + const uint code = uint(bits >> code_offset) & 7; + const uint red0 = uint(bits >> 0) & 0xff; + const uint red1 = uint(bits >> 8) & 0xff; + if (red0 > red1) { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (6 * red0 + 1 * red1) / 7; + case 3: + return (5 * red0 + 2 * red1) / 7; + case 4: + return (4 * red0 + 3 * red1) / 7; + case 5: + return (3 * red0 + 4 * red1) / 7; + case 6: + return (2 * red0 + 5 * red1) / 7; + case 7: + return (1 * red0 + 6 * red1) / 7; + } + } else { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (4 * red0 + 1 * red1) / 5; + case 3: + return (3 * red0 + 2 * red1) / 5; + case 4: + return (2 * red0 + 3 * red1) / 5; + case 5: + return (1 * red0 + 4 * red1) / 5; + case 6: + return 0; + case 7: + return 0xff; + } + } + return 0; +} + +void main() { + uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg; + uint64_t bits = packUint2x32(packed_bits); + uint red = DecompressBlock(bits, gl_LocalInvocationID.xy); + uvec4 color = uvec4(red & 0xff, 0, 0, 0xff); + imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color); +} diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag index 8a4cb024b..84b818227 100644 --- a/src/video_core/host_shaders/opengl_present.frag +++ b/src/video_core/host_shaders/opengl_present.frag @@ -1,3 +1,7 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #version 430 core layout (location = 0) in vec2 frag_tex_coord; diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert index 2235d31a4..c3b5adbba 100644 --- a/src/video_core/host_shaders/opengl_present.vert +++ b/src/video_core/host_shaders/opengl_present.vert @@ -1,3 +1,7 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #version 430 core out gl_PerVertex { diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp new file mode 100644 index 000000000..cb48ec170 --- /dev/null +++ b/src/video_core/host_shaders/pitch_unswizzle.comp @@ -0,0 +1,86 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec2 origin; +UNIFORM(1) ivec2 destination; +UNIFORM(2) uint bytes_per_block; +UNIFORM(3) uint pitch; +END_PUSH_CONSTANTS + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image; + +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block) { +#if HAS_EXTENDED_TYPES + case 1: + return uvec4(u8data[offset], 0, 0, 0); + case 2: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 2: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 4: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 8: + return uvec4(u64data[offset / 8], 0, 0); + case 16: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec2 pos = gl_GlobalInvocationID.xy + origin; + + uint offset = 0; + offset += pos.x * bytes_per_block; + offset += pos.y * pitch; + + const uvec4 texel = ReadTexel(offset); + const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination; + imageStore(output_image, coord, texel); +} diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag new file mode 100644 index 000000000..4a6aae410 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag @@ -0,0 +1,14 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D tex; + +layout(location = 0) in vec2 texcoord; +layout(location = 0) out vec4 color; + +void main() { + color = textureLod(tex, texcoord, 0); +} diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag new file mode 100644 index 000000000..19bb23a5a --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag @@ -0,0 +1,16 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +#extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) in vec2 texcoord; + +void main() { + gl_FragDepth = textureLod(depth_tex, texcoord, 0).r; + gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r; +} diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/host_shaders/vulkan_present.frag index a06ecd24a..0979ff3e6 100644 --- a/src/video_core/renderer_vulkan/shaders/blit.frag +++ b/src/video_core/host_shaders/vulkan_present.frag @@ -2,15 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - #version 460 core layout (location = 0) in vec2 frag_tex_coord; diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/host_shaders/vulkan_present.vert index c64d9235a..00b868958 100644 --- a/src/video_core/renderer_vulkan/shaders/blit.vert +++ b/src/video_core/host_shaders/vulkan_present.vert @@ -2,15 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - #version 460 core layout (location = 0) in vec2 vert_position; diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp index 5a5703308..212f4e998 100644 --- a/src/video_core/renderer_vulkan/shaders/quad_array.comp +++ b/src/video_core/host_shaders/vulkan_quad_array.comp @@ -2,15 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - #version 460 core layout (local_size_x = 1024) in; diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp index 5a472ba9b..8655591d0 100644 --- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp +++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp @@ -2,15 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V quad_indexed.comp -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - #version 460 core layout (local_size_x = 1024) in; diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp index a320f3ae0..ad74d7af9 100644 --- a/src/video_core/renderer_vulkan/shaders/uint8.comp +++ b/src/video_core/host_shaders/vulkan_uint8.comp @@ -2,15 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -/* - * Build instructions: - * $ glslangValidator -V $THIS_FILE -o output.spv - * $ spirv-opt -O --strip-debug output.spv -o optimized.spv - * $ xxd -i optimized.spv - * - * Then copy that bytecode to the C++ file - */ - #version 460 core #extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_shader_8bit_storage : require diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index df00b57df..70ac7c620 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -85,7 +85,7 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ {0x0217920100488FF7, &HLE_0217920100488FF7}, }}; -HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} HLEMacro::~HLEMacro() = default; std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { @@ -99,8 +99,8 @@ std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) co HLEMacroImpl::~HLEMacroImpl() = default; -HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) - : maxwell3d(maxwell3d), func(func) {} +HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) + : maxwell3d{maxwell3d_}, func{func_} {} void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { func(maxwell3d, parameters); diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index 37af875a0..cb3bd1600 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h @@ -20,7 +20,7 @@ using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u3 class HLEMacro { public: - explicit HLEMacro(Engines::Maxwell3D& maxwell3d); + explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); ~HLEMacro(); std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index bd01fd1f2..8da26fd59 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -11,29 +11,29 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) - : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); } -MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, - const std::vector<u32>& code) - : maxwell3d(maxwell3d), code(code) {} +MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, + const std::vector<u32>& code_) + : maxwell3d{maxwell3d_}, code{code_} {} -void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) { +void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { MICROPROFILE_SCOPE(MacroInterp); Reset(); - registers[1] = parameters[0]; - num_parameters = parameters.size(); + registers[1] = params[0]; + num_parameters = params.size(); if (num_parameters > parameters_capacity) { parameters_capacity = num_parameters; - this->parameters = std::make_unique<u32[]>(num_parameters); + parameters = std::make_unique<u32[]>(num_parameters); } - std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); + std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32)); // Execute the code until we hit an exit condition. bool keep_executing = true; @@ -133,8 +133,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) { break; } default: - UNIMPLEMENTED_MSG("Unimplemented macro operation {}", - static_cast<u32>(opcode.operation.Value())); + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value()); } // An instruction with the Exit flag will not actually @@ -182,7 +181,7 @@ u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, return ~(src_a & src_b); default: - UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", static_cast<u32>(operation)); + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation); return 0; } } @@ -230,7 +229,7 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r Send((result >> 12) & 0b111111); break; default: - UNIMPLEMENTED_MSG("Unimplemented result operation {}", static_cast<u32>(operation)); + UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation); } } diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h index 90217fc89..d50c619ce 100644 --- a/src/video_core/macro/macro_interpreter.h +++ b/src/video_core/macro/macro_interpreter.h @@ -17,7 +17,7 @@ class Maxwell3D; class MacroInterpreter final : public MacroEngine { public: - explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); + explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_); protected: std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; @@ -28,8 +28,8 @@ private: class MacroInterpreterImpl : public CachedMacro { public: - MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); - void Execute(const std::vector<u32>& parameters, u32 method) override; + explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); + void Execute(const std::vector<u32>& params, u32 method) override; private: /// Resets the execution engine state, zeroing registers, etc. @@ -38,9 +38,9 @@ private: /** * Executes a single macro instruction located at the current program counter. Returns whether * the interpreter should keep running. - * @param offset Offset to start execution at. + * * @param is_delay_slot Whether the current step is being executed due to a delay slot in a - * previous instruction. + * previous instruction. */ bool Step(bool is_delay_slot); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 954b87515..c6b2b2109 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -28,15 +28,15 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) - : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { return std::make_unique<MacroJITx64Impl>(maxwell3d, code); } -MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code) - : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) { +MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) + : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { Compile(); } @@ -165,8 +165,7 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { } break; default: - UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", - static_cast<std::size_t>(opcode.alu_operation.Value())); + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value()); break; } Compile_ProcessResult(opcode.result_operation, opcode.dst); @@ -553,15 +552,15 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { } void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { - const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { + const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) { // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero // register. - if (reg == 0) { + if (reg_index == 0) { return; } - mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); + mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result); }; - const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; + const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); }; switch (operation) { case Macro::ResultOperation::IgnoreAndFetch: @@ -604,7 +603,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 Compile_Send(RESULT); break; default: - UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation)); + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation); } } diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index a180e7428..7f50ac2f8 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -23,7 +23,7 @@ constexpr size_t MAX_CODE_SIZE = 0x10000; class MacroJITx64 final : public MacroEngine { public: - explicit MacroJITx64(Engines::Maxwell3D& maxwell3d); + explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); protected: std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; @@ -34,7 +34,7 @@ private: class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { public: - MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); + explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); ~MacroJITx64Impl(); void Execute(const std::vector<u32>& parameters, u32 method) override; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 6e70bd362..65feff588 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } // Flush and invalidate through the GPU interface, to be asynchronous if possible. - system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); + const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + + rasterizer->UnmapMemory(*cpu_addr, size); UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index c078193d9..c35e57689 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -28,7 +28,7 @@ public: }; constexpr PageEntry() = default; - constexpr PageEntry(State state) : state{state} {} + constexpr PageEntry(State state_) : state{state_} {} constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {} [[nodiscard]] constexpr bool IsUnmapped() const { @@ -68,7 +68,7 @@ static_assert(sizeof(PageEntry) == 4, "PageEntry is too large"); class MemoryManager final { public: - explicit MemoryManager(Core::System& system); + explicit MemoryManager(Core::System& system_); ~MemoryManager(); /// Binds a renderer to the memory manager. diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 9da9fb4ff..e69de29bb 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -1,250 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <array> -#include <cstring> -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/morton.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" - -namespace VideoCore { - -using Surface::GetBytesPerPixel; -using Surface::PixelFormat; - -using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*); -using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; - -template <bool morton_to_linear, PixelFormat format> -static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, - u32 tile_width_spacing, u8* buffer, u8* addr) { - constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); - - // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual - // pixel values. - constexpr u32 tile_size_x{GetDefaultBlockWidth(format)}; - constexpr u32 tile_size_y{GetDefaultBlockHeight(format)}; - - if constexpr (morton_to_linear) { - Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, - stride, height, depth, block_height, block_depth, - tile_width_spacing); - } else { - Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, - (height + tile_size_y - 1) / tile_size_y, depth, - bytes_per_pixel, bytes_per_pixel, addr, buffer, false, - block_height, block_depth, tile_width_spacing); - } -} - -static constexpr ConversionArray morton_to_linear_fns = { - MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>, - MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>, - MortonCopy<true, PixelFormat::A8B8G8R8_SINT>, - MortonCopy<true, PixelFormat::A8B8G8R8_UINT>, - MortonCopy<true, PixelFormat::R5G6B5_UNORM>, - MortonCopy<true, PixelFormat::B5G6R5_UNORM>, - MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>, - MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>, - MortonCopy<true, PixelFormat::A2B10G10R10_UINT>, - MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>, - MortonCopy<true, PixelFormat::R8_UNORM>, - MortonCopy<true, PixelFormat::R8_SNORM>, - MortonCopy<true, PixelFormat::R8_SINT>, - MortonCopy<true, PixelFormat::R8_UINT>, - MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>, - MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>, - MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>, - MortonCopy<true, PixelFormat::R16G16B16A16_SINT>, - MortonCopy<true, PixelFormat::R16G16B16A16_UINT>, - MortonCopy<true, PixelFormat::B10G11R11_FLOAT>, - MortonCopy<true, PixelFormat::R32G32B32A32_UINT>, - MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>, - MortonCopy<true, PixelFormat::BC2_UNORM>, - MortonCopy<true, PixelFormat::BC3_UNORM>, - MortonCopy<true, PixelFormat::BC4_UNORM>, - MortonCopy<true, PixelFormat::BC4_SNORM>, - MortonCopy<true, PixelFormat::BC5_UNORM>, - MortonCopy<true, PixelFormat::BC5_SNORM>, - MortonCopy<true, PixelFormat::BC7_UNORM>, - MortonCopy<true, PixelFormat::BC6H_UFLOAT>, - MortonCopy<true, PixelFormat::BC6H_SFLOAT>, - MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>, - MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>, - MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>, - MortonCopy<true, PixelFormat::R32G32B32A32_SINT>, - MortonCopy<true, PixelFormat::R32G32_FLOAT>, - MortonCopy<true, PixelFormat::R32G32_SINT>, - MortonCopy<true, PixelFormat::R32_FLOAT>, - MortonCopy<true, PixelFormat::R16_FLOAT>, - MortonCopy<true, PixelFormat::R16_UNORM>, - MortonCopy<true, PixelFormat::R16_SNORM>, - MortonCopy<true, PixelFormat::R16_UINT>, - MortonCopy<true, PixelFormat::R16_SINT>, - MortonCopy<true, PixelFormat::R16G16_UNORM>, - MortonCopy<true, PixelFormat::R16G16_FLOAT>, - MortonCopy<true, PixelFormat::R16G16_UINT>, - MortonCopy<true, PixelFormat::R16G16_SINT>, - MortonCopy<true, PixelFormat::R16G16_SNORM>, - MortonCopy<true, PixelFormat::R32G32B32_FLOAT>, - MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>, - MortonCopy<true, PixelFormat::R8G8_UNORM>, - MortonCopy<true, PixelFormat::R8G8_SNORM>, - MortonCopy<true, PixelFormat::R8G8_SINT>, - MortonCopy<true, PixelFormat::R8G8_UINT>, - MortonCopy<true, PixelFormat::R32G32_UINT>, - MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>, - MortonCopy<true, PixelFormat::R32_UINT>, - MortonCopy<true, PixelFormat::R32_SINT>, - MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>, - MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>, - MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>, - MortonCopy<true, PixelFormat::BC2_SRGB>, - MortonCopy<true, PixelFormat::BC3_SRGB>, - MortonCopy<true, PixelFormat::BC7_SRGB>, - MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>, - MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, - MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>, - MortonCopy<true, PixelFormat::D32_FLOAT>, - MortonCopy<true, PixelFormat::D16_UNORM>, - MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>, - MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>, - MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>, -}; - -static constexpr ConversionArray linear_to_morton_fns = { - MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>, - MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>, - MortonCopy<false, PixelFormat::A8B8G8R8_SINT>, - MortonCopy<false, PixelFormat::A8B8G8R8_UINT>, - MortonCopy<false, PixelFormat::R5G6B5_UNORM>, - MortonCopy<false, PixelFormat::B5G6R5_UNORM>, - MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>, - MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>, - MortonCopy<false, PixelFormat::A2B10G10R10_UINT>, - MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>, - MortonCopy<false, PixelFormat::R8_UNORM>, - MortonCopy<false, PixelFormat::R8_SNORM>, - MortonCopy<false, PixelFormat::R8_SINT>, - MortonCopy<false, PixelFormat::R8_UINT>, - MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>, - MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>, - MortonCopy<false, PixelFormat::R16G16B16A16_SINT>, - MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>, - MortonCopy<false, PixelFormat::R16G16B16A16_UINT>, - MortonCopy<false, PixelFormat::B10G11R11_FLOAT>, - MortonCopy<false, PixelFormat::R32G32B32A32_UINT>, - MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>, - MortonCopy<false, PixelFormat::BC2_UNORM>, - MortonCopy<false, PixelFormat::BC3_UNORM>, - MortonCopy<false, PixelFormat::BC4_UNORM>, - MortonCopy<false, PixelFormat::BC4_SNORM>, - MortonCopy<false, PixelFormat::BC5_UNORM>, - MortonCopy<false, PixelFormat::BC5_SNORM>, - MortonCopy<false, PixelFormat::BC7_UNORM>, - MortonCopy<false, PixelFormat::BC6H_UFLOAT>, - MortonCopy<false, PixelFormat::BC6H_SFLOAT>, - // TODO(Subv): Swizzling ASTC formats are not supported - nullptr, - MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>, - MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>, - MortonCopy<false, PixelFormat::R32G32B32A32_SINT>, - MortonCopy<false, PixelFormat::R32G32_FLOAT>, - MortonCopy<false, PixelFormat::R32G32_SINT>, - MortonCopy<false, PixelFormat::R32_FLOAT>, - MortonCopy<false, PixelFormat::R16_FLOAT>, - MortonCopy<false, PixelFormat::R16_UNORM>, - MortonCopy<false, PixelFormat::R16_SNORM>, - MortonCopy<false, PixelFormat::R16_UINT>, - MortonCopy<false, PixelFormat::R16_SINT>, - MortonCopy<false, PixelFormat::R16G16_UNORM>, - MortonCopy<false, PixelFormat::R16G16_FLOAT>, - MortonCopy<false, PixelFormat::R16G16_UINT>, - MortonCopy<false, PixelFormat::R16G16_SINT>, - MortonCopy<false, PixelFormat::R16G16_SNORM>, - MortonCopy<false, PixelFormat::R32G32B32_FLOAT>, - MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>, - MortonCopy<false, PixelFormat::R8G8_UNORM>, - MortonCopy<false, PixelFormat::R8G8_SNORM>, - MortonCopy<false, PixelFormat::R8G8_SINT>, - MortonCopy<false, PixelFormat::R8G8_UINT>, - MortonCopy<false, PixelFormat::R32G32_UINT>, - MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>, - MortonCopy<false, PixelFormat::R32_UINT>, - MortonCopy<false, PixelFormat::R32_SINT>, - nullptr, - nullptr, - nullptr, - MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>, - MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>, - MortonCopy<false, PixelFormat::BC2_SRGB>, - MortonCopy<false, PixelFormat::BC3_SRGB>, - MortonCopy<false, PixelFormat::BC7_SRGB>, - MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>, - MortonCopy<false, PixelFormat::D32_FLOAT>, - MortonCopy<false, PixelFormat::D16_UNORM>, - MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>, - MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>, - MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>, -}; - -static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { - switch (mode) { - case MortonSwizzleMode::MortonToLinear: - return morton_to_linear_fns[static_cast<std::size_t>(format)]; - case MortonSwizzleMode::LinearToMorton: - return linear_to_morton_fns[static_cast<std::size_t>(format)]; - } - UNREACHABLE(); - return morton_to_linear_fns[static_cast<std::size_t>(format)]; -} - -void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, - u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, - u8* buffer, u8* addr) { - GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, - tile_width_spacing, buffer, addr); -} - -} // namespace VideoCore diff --git a/src/video_core/morton.h b/src/video_core/morton.h index b714a7e3f..e69de29bb 100644 --- a/src/video_core/morton.h +++ b/src/video_core/morton.h @@ -1,18 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" -#include "video_core/surface.h" - -namespace VideoCore { - -enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; - -void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, - u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, - u8* buffer, u8* addr); - -} // namespace VideoCore diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index fc54ca0ef..203f2af05 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -28,8 +28,8 @@ namespace VideoCommon { template <class QueryCache, class HostCounter> class CounterStreamBase { public: - explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) - : cache{cache}, type{type} {} + explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) + : cache{cache_}, type{type_} {} /// Updates the state of the stream, enabling or disabling as needed. void Update(bool enabled) { @@ -334,8 +334,8 @@ private: template <class HostCounter> class CachedQueryBase { public: - explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) - : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} + explicit CachedQueryBase(VAddr cpu_addr_, u8* host_ptr_) + : cpu_addr{cpu_addr_}, host_ptr{host_ptr_} {} virtual ~CachedQueryBase() = default; CachedQueryBase(CachedQueryBase&&) noexcept = default; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 27ef4c69a..0cb0f387d 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -76,6 +76,9 @@ public: /// Sync memory between guest and host. virtual void SyncGuestHost() = 0; + /// Unmap memory range + virtual void UnmapMemory(VAddr addr, u64 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; @@ -83,6 +86,12 @@ public: /// Notify the host renderer to wait for previous primitive and compute operations. virtual void WaitForIdle() = 0; + /// Notify the host renderer to wait for reads and writes to render targets and flush caches. + virtual void FragmentBarrier() = 0; + + /// Notify the host renderer to make available previous render target writes. + virtual void TiledCacheBarrier() = 0; + /// Notify the rasterizer to send all written commands to the host GPU. virtual void FlushCommands() = 0; @@ -91,8 +100,7 @@ public: /// Attempt to use a faster method to perform a surface copy [[nodiscard]] virtual bool AccelerateSurfaceCopy( - const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { return false; } diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index d6120c23e..3e4d88c30 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -71,7 +71,7 @@ std::string_view GetInputFlags(PixelImap attribute) { case PixelImap::Unused: break; } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); return {}; } @@ -123,7 +123,7 @@ std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::Primitive case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: return "TRIANGLES_ADJACENCY"; default: - UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); + UNIMPLEMENTED_MSG("topology={}", topology); return "POINTS"; } } @@ -137,7 +137,7 @@ std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { case Tegra::Shader::OutputTopology::TriangleStrip: return "TRIANGLE_STRIP"; default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); + UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); return "points"; } } @@ -187,8 +187,8 @@ std::string TextureType(const MetaTexture& meta) { class ARBDecompiler final { public: - explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier); + explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, + ShaderType stage_, std::string_view identifier); std::string Code() const { return shader_source; @@ -802,9 +802,9 @@ private: }; }; -ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier) - : device{device}, ir{ir}, registry{registry}, stage{stage} { +ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, + ShaderType stage_, std::string_view identifier) + : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { DefineGlobalMemory(); AddLine("TEMP RC;"); @@ -1134,44 +1134,44 @@ void ARBDecompiler::VisitAST(const ASTNode& node) { for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } - } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) { - const std::string condition = VisitExpression(ast->condition); + } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) { + const std::string condition = VisitExpression(if_then->condition); ResetTemporaries(); AddLine("MOVC.U RC.x, {};", condition); AddLine("IF NE.x;"); - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } AddLine("ENDIF;"); - } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) { + } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) { AddLine("ELSE;"); - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } - } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { - VisitBlock(ast->nodes); - } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition)); + } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { + VisitBlock(decoded->nodes); + } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) { + AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); ResetTemporaries(); - } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { - const std::string condition = VisitExpression(ast->condition); + } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { + const std::string condition = VisitExpression(do_while->condition); ResetTemporaries(); AddLine("REP;"); - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { + for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { VisitAST(current); } AddLine("MOVC.U RC.x, {};", condition); AddLine("BRK (NE.x);"); AddLine("ENDREP;"); - } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) { - const bool is_true = ExprIsTrue(ast->condition); + } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) { + const bool is_true = ExprIsTrue(ast_return->condition); if (!is_true) { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); + AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); AddLine("IF NE.x;"); ResetTemporaries(); } - if (ast->kills) { + if (ast_return->kills) { AddLine("KIL TR;"); } else { Exit(); @@ -1179,11 +1179,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) { if (!is_true) { AddLine("ENDIF;"); } - } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) { - if (ExprIsTrue(ast->condition)) { + } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) { + if (ExprIsTrue(ast_break->condition)) { AddLine("BRK;"); } else { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); + AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); AddLine("BRK (NE.x);"); ResetTemporaries(); } @@ -1351,7 +1351,7 @@ std::string ARBDecompiler::Visit(const Node& node) { GetGenericAttributeIndex(index), swizzle); } } - UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index)); + UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); break; } return "{0, 0, 0, 0}.x"; @@ -1485,9 +1485,7 @@ void ARBDecompiler::Exit() { } const auto safe_get_register = [this](u32 reg) -> std::string { - // TODO(Rodrigo): Replace with contains once C++20 releases - const auto& used_registers = ir.GetRegisters(); - if (used_registers.find(reg) != used_registers.end()) { + if (ir.GetRegisters().contains(reg)) { return fmt::format("R{}.x", reg); } return "{0, 0, 0, 0}.x"; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index b1c4cd62f..5772cad87 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,11 +22,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_) + : BufferBlock{cpu_addr_, size_} { gl_buffer.Create(); - glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); - if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { + glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW); + if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } @@ -34,14 +34,14 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) Buffer::~Buffer() = default; -void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { - glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), - data); +void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { + glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), + static_cast<GLsizeiptr>(data_size), data); } -void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { +void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); + const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size); const GLintptr gl_offset = static_cast<GLintptr>(offset); if (read_buffer.handle == 0) { read_buffer.Create(); @@ -54,17 +54,16 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { } void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) { + std::size_t copy_size) { glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), - static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); + static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size)); } -OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const Device& device_, std::size_t stream_size) - : GenericBufferCache{rasterizer, gpu_memory, cpu_memory, - std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, - device{device_} { +OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + const Device& device_, OGLStreamBuffer& stream_buffer_, + StateTracker& state_tracker) + : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { if (!device.HasFastBufferSubData()) { return; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f75b32e31..17ee90316 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -22,18 +22,19 @@ namespace OpenGL { class Device; class OGLStreamBuffer; class RasterizerOpenGL; +class StateTracker; class Buffer : public VideoCommon::BufferBlock { public: - explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); + explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_); ~Buffer(); - void Upload(std::size_t offset, std::size_t size, const u8* data); + void Upload(std::size_t offset, std::size_t data_size, const u8* data); - void Download(std::size_t offset, std::size_t size, u8* data); + void Download(std::size_t offset, std::size_t data_size, u8* data); void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size); + std::size_t copy_size); GLuint Handle() const noexcept { return gl_buffer.handle; @@ -54,7 +55,8 @@ class OGLBufferCache final : public GenericBufferCache { public: explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const Device& device, std::size_t stream_size); + const Device& device, OGLStreamBuffer& stream_buffer, + StateTracker& state_tracker); ~OGLBufferCache(); BufferInfo GetEmptyBuffer(std::size_t) override; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a94e4f72e..b24179d59 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -5,9 +5,11 @@ #include <algorithm> #include <array> #include <cstddef> +#include <cstdlib> #include <cstring> #include <limits> #include <optional> +#include <span> #include <vector> #include <glad/glad.h> @@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1; constexpr u32 NumStages = 5; -constexpr std::array LimitUBOs = { +constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, - GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; - -constexpr std::array LimitSSBOs = { + GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, +}; +constexpr std::array LIMIT_SSBOS = { GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; - -constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; - -constexpr std::array LimitImages = { + GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, +}; +constexpr std::array LIMIT_SAMPLERS = { + GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, + GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_TEXTURE_IMAGE_UNITS, + GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, +}; +constexpr std::array LIMIT_IMAGES = { GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; + GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, +}; template <typename T> T GetInteger(GLenum pname) { @@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() { return extensions; } -bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) { - return std::find(images.begin(), images.end(), extension) != images.end(); +bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) { + return std::ranges::find(extensions, extension) != extensions.end(); } u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { @@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { std::array<u32, Tegra::Engines::MaxShaderTypes> max; - std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), - [](GLenum pname) { return GetInteger<u32>(pname); }); + std::ranges::transform(LIMIT_UBOS, max.begin(), + [](GLenum pname) { return GetInteger<u32>(pname); }); return max; } @@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin for (std::size_t i = 0; i < NumStages; ++i) { const std::size_t stage = stage_swizzle[i]; bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; + Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), + Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), + Extract(base_samplers, num_samplers, total_samplers / NumStages, + LIMIT_SAMPLERS[stage])}; } u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); @@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin // Reserve at least 4 image bindings on the fragment stage. bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); + Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); // This is guaranteed to be at least 1. const u32 total_extracted_images = num_images / (NumStages - 1); @@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin continue; } bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); + Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); } // Compute doesn't care about any of this. @@ -188,6 +193,11 @@ bool IsASTCSupported() { return true; } +[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) { + const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); + return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); +} + } // Anonymous namespace Device::Device() @@ -206,9 +216,8 @@ Device::Device() "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - - uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); - shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); + uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); @@ -224,6 +233,7 @@ Device::Device() has_precise_bug = TestPreciseBug(); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; + has_debugging_tool_attached = IsDebugToolAttached(extensions); // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8a4b6b9fc..13e66846c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -36,11 +36,11 @@ public: return GetBaseBindings(static_cast<std::size_t>(shader_type)); } - std::size_t GetUniformBufferAlignment() const { + size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } - std::size_t GetShaderStorageBufferAlignment() const { + size_t GetShaderStorageBufferAlignment() const { return shader_storage_alignment; } @@ -104,6 +104,10 @@ public: return has_nv_viewport_array2; } + bool HasDebuggingToolAttached() const { + return has_debugging_tool_attached; + } + bool UseAssemblyShaders() const { return use_assembly_shaders; } @@ -118,8 +122,8 @@ private: std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; - std::size_t uniform_buffer_alignment{}; - std::size_t shader_storage_alignment{}; + size_t uniform_buffer_alignment{}; + size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; @@ -135,6 +139,7 @@ private: bool has_precise_bug{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; + bool has_debugging_tool_attached{}; bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; }; diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index b532fdcc2..3e9c922f5 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -11,10 +11,10 @@ namespace OpenGL { -GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {} +GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {} -GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed) - : FenceBase(address, payload, is_stubbed) {} +GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_) + : FenceBase{address_, payload_, is_stubbed_} {} GLInnerFence::~GLInnerFence() = default; @@ -45,10 +45,10 @@ void GLInnerFence::Wait() { glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED); } -FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, - TextureCacheOpenGL& texture_cache, - OGLBufferCache& buffer_cache, QueryCache& query_cache) - : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {} +FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, + Tegra::GPU& gpu_, TextureCache& texture_cache_, + OGLBufferCache& buffer_cache_, QueryCache& query_cache_) + : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { return std::make_shared<GLInnerFence>(value, is_stubbed); diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index da1dcdace..30dbee613 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -17,8 +17,8 @@ namespace OpenGL { class GLInnerFence : public VideoCommon::FenceBase { public: - GLInnerFence(u32 payload, bool is_stubbed); - GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed); + explicit GLInnerFence(u32 payload_, bool is_stubbed_); + explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_); ~GLInnerFence(); void Queue(); @@ -33,13 +33,13 @@ private: using Fence = std::shared_ptr<GLInnerFence>; using GenericFenceManager = - VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; + VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>; class FenceManagerOpenGL final : public GenericFenceManager { public: - explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, - TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, - QueryCache& query_cache); + explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, + TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, + QueryCache& query_cache_); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp deleted file mode 100644 index b8a512cb6..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <tuple> -#include <unordered_map> -#include <utility> - -#include <glad/glad.h> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h" - -namespace OpenGL { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using VideoCore::Surface::SurfaceType; - -FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; - -FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; - -GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { - const auto [entry, is_cache_miss] = cache.try_emplace(key); - auto& framebuffer{entry->second}; - if (is_cache_miss) { - framebuffer = CreateFramebuffer(key); - } - return framebuffer.handle; -} - -OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { - OGLFramebuffer framebuffer; - framebuffer.Create(); - - // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); - - if (key.zeta) { - const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; - const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; - key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER); - } - - std::size_t num_buffers = 0; - std::array<GLenum, Maxwell::NumRenderTargets> targets; - - for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - if (!key.colors[index]) { - targets[index] = GL_NONE; - continue; - } - const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index); - key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER); - - const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111; - targets[index] = GL_COLOR_ATTACHMENT0 + attachment; - num_buffers = index + 1; - } - - if (num_buffers > 0) { - glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets)); - } else { - glDrawBuffer(GL_NONE); - } - - return framebuffer; -} - -std::size_t FramebufferCacheKey::Hash() const noexcept { - std::size_t hash = std::hash<View>{}(zeta); - for (const auto& color : colors) { - hash ^= std::hash<View>{}(color); - } - hash ^= static_cast<std::size_t>(color_attachments) << 16; - return hash; -} - -bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept { - return std::tie(colors, zeta, color_attachments) == - std::tie(rhs.colors, rhs.zeta, rhs.color_attachments); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h deleted file mode 100644 index 8f698fee0..000000000 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <cstddef> -#include <unordered_map> - -#include <glad/glad.h> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" - -namespace OpenGL { - -constexpr std::size_t BitsPerAttachment = 4; - -struct FramebufferCacheKey { - View zeta; - std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors; - u32 color_attachments = 0; - - std::size_t Hash() const noexcept; - - bool operator==(const FramebufferCacheKey& rhs) const noexcept; - - bool operator!=(const FramebufferCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - void SetAttachment(std::size_t index, u32 attachment) { - color_attachments |= attachment << (BitsPerAttachment * index); - } -}; - -} // namespace OpenGL - -namespace std { - -template <> -struct hash<OpenGL::FramebufferCacheKey> { - std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace OpenGL { - -class FramebufferCacheOpenGL { -public: - FramebufferCacheOpenGL(); - ~FramebufferCacheOpenGL(); - - GLuint GetFramebuffer(const FramebufferCacheKey& key); - -private: - OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); - - std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 1a3d9720e..acebbf5f4 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -30,11 +30,9 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { } // Anonymous namespace -QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory) - : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>( - rasterizer, maxwell3d, gpu_memory), - gl_rasterizer{rasterizer} {} +QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_) + : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {} QueryCache::~QueryCache() = default; @@ -59,10 +57,11 @@ bool QueryCache::AnyCommandQueued() const noexcept { return gl_rasterizer.AnyCommandQueued(); } -HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, - VideoCore::QueryType type) - : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache}, - type{type}, query{cache.AllocateQuery(type)} { +HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, + VideoCore::QueryType type_) + : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{ + cache.AllocateQuery( + type)} { glBeginQuery(GetTarget(type), query.handle); } @@ -86,13 +85,14 @@ u64 HostCounter::BlockingQuery() const { return static_cast<u64>(value); } -CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) - : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {} +CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_, + u8* host_ptr_) + : CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {} CachedQuery::~CachedQuery() = default; CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept - : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} + : CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { cache = rhs.cache; diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 82cac51ee..7bbe5cfe9 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -29,8 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { public: - explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory); + explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_); ~QueryCache(); OGLQuery AllocateQuery(VideoCore::QueryType type); @@ -46,8 +46,8 @@ private: class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { public: - explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, - VideoCore::QueryType type); + explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, + VideoCore::QueryType type_); ~HostCounter(); void EndQuery(); @@ -62,8 +62,8 @@ private: class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> { public: - explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, - u8* host_ptr); + explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_, + u8* host_ptr_); ~CachedQuery() override; CachedQuery(CachedQuery&& rhs) noexcept; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index cfddbde5d..8aa63d329 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,12 +25,15 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/shader_cache.h" +#include "video_core/texture_cache/texture_cache.h" namespace OpenGL { @@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 namespace { -constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; -constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = +constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; +constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; -constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = +constexpr size_t TOTAL_CONST_BUFFER_BYTES = NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; -constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; + +constexpr size_t MAX_TEXTURES = 192; +constexpr size_t MAX_IMAGES = 48; + +struct TextureHandle { + constexpr TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + + u32 image; + u32 sampler; +}; template <typename Engine, typename Entry> -Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - ShaderType shader_type, std::size_t index = 0) { +TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, + ShaderType shader_type, size_t index = 0) { if constexpr (std::is_same_v<Entry, SamplerEntry>) { if (entry.is_separated) { const u32 buffer_1 = entry.buffer; @@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry const u32 offset_2 = entry.secondary_offset; const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return engine.GetTextureInfo(handle_1 | handle_2); + return TextureHandle(handle_1 | handle_2, via_header_index); } } if (entry.is_bindless) { - const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(handle); - } - - const auto& gpu_profile = engine.AccessGuestDriverProfile(); - const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); - if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { - return engine.GetStageTexture(shader_type, offset); - } else { - return engine.GetTexture(offset); + const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); + return TextureHandle(raw, via_header_index); } + const u32 buffer = engine.GetBoundBuffer(); + const u64 offset = (entry.offset + index) * sizeof(u32); + return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); } std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, @@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, if (!entry.IsIndirect()) { return entry.GetSize(); } - if (buffer.size > Maxwell::MaxConstBufferSize) { LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, Maxwell::MaxConstBufferSize); @@ -131,7 +142,7 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { case 43: return {GL_BACK_SECONDARY_COLOR_NV, 0}; } - UNIMPLEMENTED_MSG("index={}", static_cast<int>(index)); + UNIMPLEMENTED_MSG("index={}", index); return {GL_POSITION, 0}; } @@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss reinterpret_cast<const GLuint*>(ssbos)); } +ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { + if (entry.is_buffer) { + return ImageViewType::Buffer; + } + switch (entry.type) { + case Tegra::Shader::TextureType::Texture1D: + return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; + case Tegra::Shader::TextureType::Texture2D: + return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; + case Tegra::Shader::TextureType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::TextureType::TextureCube: + return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { + switch (entry.type) { + case Tegra::Shader::ImageType::Texture1D: + return ImageViewType::e1D; + case Tegra::Shader::ImageType::Texture1DArray: + return ImageViewType::e1DArray; + case Tegra::Shader::ImageType::Texture2D: + return ImageViewType::e2D; + case Tegra::Shader::ImageType::Texture2DArray: + return ImageViewType::e2DArray; + case Tegra::Shader::ImageType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::ImageType::TextureBuffer: + return ImageViewType::Buffer; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + } // Anonymous namespace -RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, - Core::Memory::Memory& cpu_memory, const Device& device_, +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, + Core::Memory::Memory& cpu_memory_, const Device& device_, ScreenInfo& screen_info_, ProgramManager& program_manager_, StateTracker& state_tracker_) - : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), + : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), - texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), - shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device), + stream_buffer(device, state_tracker), + texture_cache_runtime(device, program_manager, state_tracker), + texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), + shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), - buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE), + buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), - async_shaders(emu_window) { - CheckExtensions(); - + async_shaders(emu_window_) { unified_uniform_buffer.Create(); glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); @@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra: nullptr, 0); } } - if (device.UseAsynchronousShaders()) { async_shaders.AllocateWorkers(); } @@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -void RasterizerOpenGL::CheckExtensions() { - if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { - LOG_WARNING( - Render_OpenGL, - "Anisotropic filter is not supported! This can cause graphical issues in some games."); - } -} - void RasterizerOpenGL::SetupVertexFormat() { auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexFormats]) { @@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { return info.offset; } -void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { +void RasterizerOpenGL::SetupShaders() { MICROPROFILE_SCOPE(OpenGL_Shader); u32 clip_distances = 0; + std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; + image_view_indices.clear(); + sampler_handles.clear(); + + texture_cache.SynchronizeGraphicsDescriptors(); + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = maxwell3d.regs.shader_config[index]; const auto program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } continue; } - // Currently this stages are not supported in the OpenGL backend. // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL if (program == Maxwell::ShaderProgram::TesselationControl || @@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { default: UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, shader_config.enable.Value(), shader_config.offset); + break; } // Stage indices are 0 - 5 - const std::size_t stage = index == 0 ? 0 : index - 1; + const size_t stage = index == 0 ? 0 : index - 1; + shaders[stage] = shader; + SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader); - SetupDrawImages(stage, shader); + SetupDrawTextures(shader, stage); + SetupDrawImages(shader, stage); // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen @@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { ++index; } } - SyncClipEnabled(clip_distances); maxwell3d.dirty.flags[Dirty::Shaders] = false; + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + size_t image_view_index = 0; + size_t texture_index = 0; + size_t image_index = 0; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader* const shader = shaders[stage]; + if (shader) { + const auto base = device.GetBaseBindings(stage); + BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, + texture_index, image_index); + } + } } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s shader_cache.LoadDiskCache(title_id, stop_loading, callback); } -void RasterizerOpenGL::ConfigureFramebuffers() { - MICROPROFILE_SCOPE(OpenGL_Framebuffer); - if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) { - return; - } - maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; - - texture_cache.GuardRenderTargets(true); - - View depth_surface = texture_cache.GetDepthBufferSurface(true); - - const auto& regs = maxwell3d.regs; - UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); - - // Bind the framebuffer surfaces - FramebufferCacheKey key; - const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); - for (std::size_t index = 0; index < colors_count; ++index) { - View color_surface{texture_cache.GetColorBufferSurface(index, true)}; - if (!color_surface) { - continue; - } - // Assume that a surface will be written to if it is used as a framebuffer, even - // if the shader doesn't actually write to it. - texture_cache.MarkColorBufferInUse(index); - - key.SetAttachment(index, regs.rt_control.GetMap(index)); - key.colors[index] = std::move(color_surface); - } - - if (depth_surface) { - // Assume that a surface will be written to if it is used as a framebuffer, even if - // the shader doesn't actually write to it. - texture_cache.MarkDepthBufferInUse(); - key.zeta = std::move(depth_surface); - } - - texture_cache.GuardRenderTargets(false); - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} - -void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { - const auto& regs = maxwell3d.regs; - - texture_cache.GuardRenderTargets(true); - View color_surface; - - if (using_color) { - // Determine if we have to preserve the contents. - // First we have to make sure all clear masks are enabled. - bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G || - !regs.clear_buffers.B || !regs.clear_buffers.A; - const std::size_t index = regs.clear_buffers.RT; - if (regs.clear_flags.scissor) { - // Then we have to confirm scissor testing clears the whole image. - const auto& scissor = regs.scissor_test[0]; - preserve_contents |= scissor.min_x > 0; - preserve_contents |= scissor.min_y > 0; - preserve_contents |= scissor.max_x < regs.rt[index].width; - preserve_contents |= scissor.max_y < regs.rt[index].height; - } - - color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents); - texture_cache.MarkColorBufferInUse(index); - } - - View depth_surface; - if (using_depth_stencil) { - bool preserve_contents = false; - if (regs.clear_flags.scissor) { - // For depth stencil clears we only have to confirm scissor test covers the whole image. - const auto& scissor = regs.scissor_test[0]; - preserve_contents |= scissor.min_x > 0; - preserve_contents |= scissor.min_y > 0; - preserve_contents |= scissor.max_x < regs.zeta_width; - preserve_contents |= scissor.max_y < regs.zeta_height; - } - - depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); - texture_cache.MarkDepthBufferInUse(); - } - texture_cache.GuardRenderTargets(false); - - FramebufferCacheKey key; - key.colors[0] = std::move(color_surface); - key.zeta = std::move(depth_surface); - - state_tracker.NotifyFramebuffer(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); -} - void RasterizerOpenGL::Clear() { if (!maxwell3d.ShouldExecute()) { return; @@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() { regs.clear_buffers.A) { use_color = true; - state_tracker.NotifyColorMask0(); - glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, + const GLuint index = regs.clear_buffers.RT; + state_tracker.NotifyColorMask(index); + glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); // TODO(Rodrigo): Determine if clamping is used on clears @@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() { state_tracker.NotifyScissor0(); glDisablei(GL_SCISSOR_TEST, 0); } - UNIMPLEMENTED_IF(regs.clear_flags.viewport); - ConfigureClearFramebuffer(use_color, use_depth || use_stencil); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UpdateRenderTargets(true); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + } if (use_color) { - glClearBufferfv(GL_COLOR, 0, regs.clear_color); + glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); } - if (use_depth && use_stencil) { glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); } else if (use_depth) { @@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); // Prepare the vertex array. - const bool invalidated = buffer_cache.Map(buffer_size); - - if (invalidated) { - // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::VertexBuffers] = true; - for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { - dirty[index] = true; - } - } + buffer_cache.Map(buffer_size); // Prepare vertex array format. SetupVertexFormat(); @@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } // Setup shaders and their used resources. - texture_cache.GuardSamplers(true); - const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - SetupShaders(primitive_mode); - texture_cache.GuardSamplers(false); - - ConfigureFramebuffers(); + auto lock = texture_cache.AcquireLock(); + SetupShaders(); // Signal the buffer cache that we are not going to upload more things. buffer_cache.Unmap(); - + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); program_manager.BindGraphicsPipeline(); - if (texture_cache.TextureBarrier()) { - glTextureBarrier(); - } - + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); @@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Acquire(); current_cbuf = 0; - auto kernel = shader_cache.GetComputeKernel(code_addr); - program_manager.BindCompute(kernel->GetHandle()); + Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - SetupComputeTextures(kernel); - SetupComputeImages(kernel); + auto lock = texture_cache.AcquireLock(); + BindComputeTextures(kernel); - const std::size_t buffer_size = - Tegra::Engines::KeplerCompute::NumConstBuffers * - (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); + const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * + (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); SetupComputeConstBuffers(kernel); @@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); const auto& launch_desc = kepler_compute.launch_description; - program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.FlushRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.DownloadMemory(addr, size); + } buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } @@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { if (!Settings::IsGPULevelHigh()) { return buffer_cache.MustFlushRegion(addr, size); } - return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); + return texture_cache.IsRegionGpuModified(addr, size) || + buffer_cache.MustFlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { @@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.InvalidateRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); @@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.OnCPUWrite(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } shader_cache.OnCPUWrite(addr, size); buffer_cache.OnCPUWrite(addr, size); } void RasterizerOpenGL::SyncGuestHost() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - texture_cache.SyncGuestHost(); buffer_cache.SyncGuestHost(); shader_cache.SyncGuestHost(); } +void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UnmapMemory(addr, size); + } + buffer_cache.OnCPUWrite(addr, size); + shader_cache.OnCPUWrite(addr, size); +} + void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write<u32>(addr, value); @@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() { GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); } +void RasterizerOpenGL::FragmentBarrier() { + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); +} + +void RasterizerOpenGL::TiledCacheBarrier() { + glTextureBarrier(); +} + void RasterizerOpenGL::FlushCommands() { // Only flush when we have commands queued to OpenGL. if (num_queued_commands == 0) { @@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() { // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. num_queued_commands = 0; + fence_manager.TickFrame(); buffer_cache.TickFrame(); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.TickFrame(); + } } -bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, +bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { MICROPROFILE_SCOPE(OpenGL_Blits); - texture_cache.DoFermiCopy(src, dst, copy_config); + auto lock = texture_cache.AcquireLock(); + texture_cache.BlitImage(dst, src, copy_config); return true; } bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { - if (!framebuffer_addr) { - return {}; + if (framebuffer_addr == 0) { + return false; } - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; - if (!surface) { - return {}; + auto lock = texture_cache.AcquireLock(); + ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; + if (!image_view) { + return false; } - // Verify that the cached surface is the same size and format as the requested framebuffer - const auto& params{surface->GetSurfaceParams()}; - const auto& pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; - ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); + // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); + // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - if (params.pixel_format != pixel_format) { - LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); - } + screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); + return true; +} - screen_info.display_texture = surface->GetTexture(); - screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; +void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { + image_view_indices.clear(); + sampler_handles.clear(); - return true; + texture_cache.SynchronizeComputeDescriptors(); + + SetupComputeTextures(kernel); + SetupComputeImages(kernel); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + program_manager.BindCompute(kernel->GetHandle()); + size_t image_view_index = 0; + size_t texture_index = 0; + size_t image_index = 0; + BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); +} + +void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, + GLuint base_image, size_t& image_view_index, + size_t& texture_index, size_t& image_index) { + const GLuint* const samplers = sampler_handles.data() + texture_index; + const GLuint* const textures = texture_handles.data() + texture_index; + const GLuint* const images = image_handles.data() + image_index; + + const size_t num_samplers = entries.samplers.size(); + for (const auto& sampler : entries.samplers) { + for (size_t i = 0; i < sampler.size; ++i) { + const ImageViewId image_view_id = image_view_ids[image_view_index++]; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); + texture_handles[texture_index++] = handle; + } + } + const size_t num_images = entries.images.size(); + for (size_t unit = 0; unit < num_images; ++unit) { + // TODO: Mark as modified + const ImageViewId image_view_id = image_view_ids[image_view_index++]; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); + image_handles[image_index] = handle; + ++image_index; + } + if (num_samplers > 0) { + glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); + glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); + } + if (num_images > 0) { + glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); + } } void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { @@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; - const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; const auto& entries{shader->GetEntries().global_memory_entries}; @@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e } } -void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { - MICROPROFILE_SCOPE(OpenGL_Texture); - u32 binding = device.GetBaseBindings(stage_index).sampler; +void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { + const bool via_header_index = + maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : shader->GetEntries().samplers) { const auto shader_type = static_cast<ShaderType>(stage_index); - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); - SetupTexture(binding++, texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const auto handle = + GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); + const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + image_view_indices.push_back(handle.image); } } } -void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { - MICROPROFILE_SCOPE(OpenGL_Texture); - u32 binding = 0; +void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : kernel->GetEntries().samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); - SetupTexture(binding++, texture, entry); + for (size_t i = 0; i < entry.size; ++i) { + const auto handle = + GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); + const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + image_view_indices.push_back(handle.image); } } } -void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry) { - const auto view = texture_cache.GetTextureSurface(texture.tic, entry); - if (!view) { - // Can occur when texture addr is null or its memory is unmapped/invalid - glBindSampler(binding, 0); - glBindTextureUnit(binding, 0); - return; - } - const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); - glBindTextureUnit(binding, handle); - if (!view->GetSurfaceParams().IsBuffer()) { - glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); - } -} - -void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { - u32 binding = device.GetBaseBindings(stage_index).image; +void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { + const bool via_header_index = + maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : shader->GetEntries().images) { const auto shader_type = static_cast<ShaderType>(stage_index); - const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; - SetupImage(binding++, tic, entry); + const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); + image_view_indices.push_back(handle.image); } } -void RasterizerOpenGL::SetupComputeImages(Shader* shader) { - u32 binding = 0; +void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : shader->GetEntries().images) { - const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; - SetupImage(binding++, tic, entry); + const auto handle = + GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); + image_view_indices.push_back(handle.image); } } -void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, - const ImageEntry& entry) { - const auto view = texture_cache.GetImageSurface(tic, entry); - if (!view) { - glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); - return; - } - if (entry.is_written) { - view->MarkAsModified(texture_cache.Tick()); - } - const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); - glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); -} - void RasterizerOpenGL::SyncViewport() { auto& flags = maxwell3d.dirty.flags; const auto& regs = maxwell3d.regs; @@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() { flags[Dirty::PointSize] = false; oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); + oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); - if (maxwell3d.regs.vp_point_size.enable) { - // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. - glEnable(GL_PROGRAM_POINT_SIZE); - return; - } - - // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid - // in OpenGL). glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); - glDisable(GL_PROGRAM_POINT_SIZE); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1d0f585fa..82e03e677 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,12 +7,13 @@ #include <array> #include <atomic> #include <cstddef> -#include <map> #include <memory> #include <optional> #include <tuple> #include <utility> +#include <boost/container/static_vector.hpp> + #include <glad/glad.h> #include "common/common_types.h" @@ -23,16 +24,14 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" -#include "video_core/renderer_opengl/gl_framebuffer_cache.h" #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h" #include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" @@ -51,7 +50,7 @@ class MemoryManager; namespace OpenGL { struct ScreenInfo; -struct DrawParameters; +struct ShaderEntries; struct BindlessSSBO { GLuint64EXT address; @@ -62,10 +61,10 @@ static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, - Core::Memory::Memory& cpu_memory, const Device& device, - ScreenInfo& screen_info, ProgramManager& program_manager, - StateTracker& state_tracker); + explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, + Core::Memory::Memory& cpu_memory_, const Device& device_, + ScreenInfo& screen_info_, ProgramManager& program_manager_, + StateTracker& state_tracker_); ~RasterizerOpenGL() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -79,15 +78,18 @@ public: void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; + void UnmapMemory(VAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitForIdle() override; + void FragmentBarrier() override; + void TiledCacheBarrier() override; void FlushCommands() override; void TickFrame() override; - bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; @@ -108,11 +110,14 @@ public: } private: - /// Configures the color and depth framebuffer states. - void ConfigureFramebuffers(); + static constexpr size_t MAX_TEXTURES = 192; + static constexpr size_t MAX_IMAGES = 48; + static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; + + void BindComputeTextures(Shader* kernel); - /// Configures the color and depth framebuffer for clearing. - void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); + void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, + size_t& image_view_index, size_t& texture_index, size_t& image_index); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); @@ -136,23 +141,16 @@ private: size_t size, BindlessSSBO* ssbo); /// Configures the current textures to use for the draw command. - void SetupDrawTextures(std::size_t stage_index, Shader* shader); + void SetupDrawTextures(const Shader* shader, size_t stage_index); /// Configures the textures used in a compute shader. - void SetupComputeTextures(Shader* kernel); - - /// Configures a texture. - void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry); + void SetupComputeTextures(const Shader* kernel); /// Configures images in a graphics shader. - void SetupDrawImages(std::size_t stage_index, Shader* shader); + void SetupDrawImages(const Shader* shader, size_t stage_index); /// Configures images in a compute shader. - void SetupComputeImages(Shader* shader); - - /// Configures an image. - void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); + void SetupComputeImages(const Shader* shader); /// Syncs the viewport and depth range to match the guest state void SyncViewport(); @@ -227,9 +225,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - /// Check for extension that are not strictly required but are needed for correct emulation - void CheckExtensions(); - std::size_t CalculateVertexArraysSize() const; std::size_t CalculateIndexBufferSize() const; @@ -242,7 +237,7 @@ private: GLintptr SetupIndexBuffer(); - void SetupShaders(GLenum primitive_mode); + void SetupShaders(); Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -254,19 +249,21 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; - TextureCacheOpenGL texture_cache; + OGLStreamBuffer stream_buffer; + TextureCacheRuntime texture_cache_runtime; + TextureCache texture_cache; ShaderCacheOpenGL shader_cache; - SamplerCacheOpenGL sampler_cache; - FramebufferCacheOpenGL framebuffer_cache; QueryCache query_cache; OGLBufferCache buffer_cache; FenceManagerOpenGL fence_manager; VideoCommon::Shader::AsyncShaders async_shaders; - static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; - - GLint vertex_binding = 0; + boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; + std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; + boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; + std::array<GLuint, MAX_TEXTURES> texture_handles; + std::array<GLuint, MAX_IMAGES> image_handles; std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> transform_feedback_buffers; @@ -280,7 +277,7 @@ private: std::size_t current_cbuf = 0; OGLBuffer unified_uniform_buffer; - /// Number of commands queued to the OpenGL driver. Reseted on flush. + /// Number of commands queued to the OpenGL driver. Resetted on flush. std::size_t num_queued_commands = 0; u32 last_clip_distance_mask = 0; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0ebcec427..0e34a0f20 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -71,7 +71,7 @@ void OGLSampler::Create() { return; MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenSamplers(1, &handle); + glCreateSamplers(1, &handle); } void OGLSampler::Release() { diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp deleted file mode 100644 index 5c174879a..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/logging/log.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_sampler_cache.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - -namespace OpenGL { - -SamplerCacheOpenGL::SamplerCacheOpenGL() = default; - -SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; - -OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { - OGLSampler sampler; - sampler.Create(); - - const GLuint sampler_id{sampler.handle}; - glSamplerParameteri( - sampler_id, GL_TEXTURE_MAG_FILTER, - MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); - glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, - tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, - MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); - glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); - glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); - if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); - } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); - } else { - LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); - } - - return sampler; -} - -GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { - return sampler.handle; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h deleted file mode 100644 index 34ee37f00..000000000 --- a/src/video_core/renderer_opengl/gl_sampler_cache.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <glad/glad.h> - -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/sampler_cache.h" - -namespace OpenGL { - -class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> { -public: - explicit SamplerCacheOpenGL(); - ~SamplerCacheOpenGL(); - -protected: - OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - - GLuint ToSamplerType(const OGLSampler& sampler) const override; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index bd56bed0c..d4841fdb7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -27,7 +27,6 @@ #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -198,10 +197,10 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u return program; } -Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, - ProgramSharedPtr program_, bool is_built) +Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_, + ProgramSharedPtr program_, bool is_built_) : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, - is_built(is_built) { + is_built{is_built_} { handle = program->assembly_program.handle; if (handle == 0) { handle = program->source_program.handle; @@ -318,14 +317,13 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); } -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, +ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_) - : VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_}, - gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, device{device_} {} + : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, + maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; @@ -460,7 +458,7 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, const std::unordered_set<GLenum>& supported_formats) { - if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { + if (!supported_formats.contains(precompiled_entry.binary_format)) { LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 1708af06a..2aed0697e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -108,7 +108,7 @@ public: private: explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, - ProgramSharedPtr program, bool is_built = true); + ProgramSharedPtr program, bool is_built_ = true); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; @@ -119,10 +119,11 @@ private: class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window, - Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device); + explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, + Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_); ~ShaderCacheOpenGL() override; /// Loads disk cache for the current game diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 95ca96c8e..2e1fa252d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode; using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using Tegra::Shader::TextureType; -using VideoCommon::Shader::BuildTransformFeedback; -using VideoCommon::Shader::Registry; -using namespace std::string_literals; using namespace VideoCommon::Shader; +using namespace std::string_literals; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Operation = const OperationNode&; @@ -131,7 +129,7 @@ private: class Expression final { public: - Expression(std::string code, Type type) : code{std::move(code)}, type{type} { + Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { ASSERT(type != Type::Void); } Expression() : type{Type::Void} {} @@ -148,8 +146,8 @@ public: ASSERT(type == Type::Void); } - std::string As(Type type) const { - switch (type) { + std::string As(Type type_) const { + switch (type_) { case Type::Bool: return AsBool(); case Type::Bool2: @@ -316,7 +314,7 @@ std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology t case Maxwell::PrimitiveTopology::TriangleStripAdjacency: return {"triangles_adjacency", 6}; default: - UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); + UNIMPLEMENTED_MSG("topology={}", topology); return {"points", 1}; } } @@ -342,7 +340,7 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { case Tegra::Shader::OutputTopology::TriangleStrip: return "triangle_strip"; default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); + UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); return "points"; } } @@ -418,11 +416,12 @@ struct GenericVaryingDescription { class GLSLDecompiler final { public: - explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier, std::string_view suffix) - : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier}, - suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{ - UseUnifiedUniforms(device, ir, stage)} { + explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, + ShaderType stage_, std::string_view identifier_, + std::string_view suffix_) + : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_}, + suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{ + UseUnifiedUniforms(device_, ir_, stage_)} { if (stage != ShaderType::Compute) { transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); } @@ -744,7 +743,7 @@ private: case PixelImap::Unused: break; } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); return {}; } @@ -777,16 +776,16 @@ private: name = "gs_" + name + "[]"; } - std::string suffix; + std::string suffix_; if (stage == ShaderType::Fragment) { const auto input_mode{header.ps.GetPixelImap(location)}; if (input_mode == PixelImap::Unused) { return; } - suffix = GetInputFlags(input_mode); + suffix_ = GetInputFlags(input_mode); } - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); + code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); } void DeclareOutputAttributes() { @@ -877,7 +876,7 @@ private: } u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto [index, info] : ir.GetConstantBuffers()) { + for (const auto& [index, info] : ir.GetConstantBuffers()) { const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4; const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, @@ -1251,7 +1250,7 @@ private: } break; } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); + UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); return {"0", Type::Int}; } @@ -1331,7 +1330,7 @@ private: GetSwizzle(element)), Type::Float}}; } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); + UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); return std::nullopt; } } @@ -2100,13 +2099,13 @@ private: const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; const bool separate_dc = meta.sampler.is_shadow; - std::vector<TextureIR> ir; + std::vector<TextureIR> ir_; if (meta.sampler.is_shadow) { - ir = {TextureOffset{}}; + ir_ = {TextureOffset{}}; } else { - ir = {TextureOffset{}, TextureArgument{type, meta.component}}; + ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; } - return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element), + return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), Type::Float}; } @@ -2752,11 +2751,11 @@ private: } } - std::string GetSampler(const Sampler& sampler) const { + std::string GetSampler(const SamplerEntry& sampler) const { return AppendSuffix(sampler.index, "sampler"); } - std::string GetImage(const Image& image) const { + std::string GetImage(const ImageEntry& image) const { return AppendSuffix(image.index, "image"); } @@ -2801,7 +2800,7 @@ std::string GetFlowVariable(u32 index) { class ExprDecompiler { public: - explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} + explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} void operator()(const ExprAnd& expr) { inner += '('; @@ -2856,7 +2855,7 @@ private: class ASTDecompiler { public: - explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} + explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} void operator()(const ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 451c9689a..be68994bb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -20,13 +20,13 @@ namespace OpenGL { class Device; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::Sampler; -using ImageEntry = VideoCommon::Shader::Image; +using SamplerEntry = VideoCommon::Shader::SamplerEntry; +using ImageEntry = VideoCommon::Shader::ImageEntry; class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { public: - explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index) - : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {} + explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) + : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} u32 GetIndex() const { return index; @@ -37,10 +37,10 @@ private: }; struct GlobalMemoryEntry { - constexpr explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, - bool is_written) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{ - is_written} {} + constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, + bool is_written_) + : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ + is_written_} {} u32 cbuf_index = 0; u32 cbuf_offset = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 70dd0c3c6..955b2abc4 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -343,7 +343,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { } const u64 id = entry.unique_identifier; - if (stored_transferable.find(id) != stored_transferable.end()) { + if (stored_transferable.contains(id)) { // The shader already exists return; } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 691c6c79b..553e6e8d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() { } } +void ProgramManager::BindHostCompute(GLuint program) { + if (use_assembly_programs) { + glDisable(GL_COMPUTE_PROGRAM_NV); + } + glUseProgram(program); + is_graphics_bound = false; +} + +void ProgramManager::RestoreGuestCompute() { + if (use_assembly_programs) { + glEnable(GL_COMPUTE_PROGRAM_NV); + glUseProgram(0); + } +} + void ProgramManager::UseVertexShader(GLuint program) { if (use_assembly_programs) { BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 950e0dfcb..ad42cce74 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,6 +45,12 @@ public: /// Rewinds BindHostPipeline state changes. void RestoreGuestPipeline(); + /// Binds an OpenGL GLSL program object unsynchronized with the guest state. + void BindHostCompute(GLuint program); + + /// Rewinds BindHostCompute state changes. + void RestoreGuestCompute(); + void UseVertexShader(GLuint program); void UseGeometryShader(GLuint program); void UseFragmentShader(GLuint program); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 6bcf831f2..60e6fa39f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -13,7 +13,7 @@ #include "video_core/renderer_opengl/gl_state_tracker.h" #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) -#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) +#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace OpenGL { @@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} } } +void StateTracker::InvalidateStreamBuffer() { + flags[Dirty::VertexBuffers] = true; + for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { + flags[index] = true; + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 9d127548f..574615d3c 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -92,6 +92,8 @@ class StateTracker { public: explicit StateTracker(Tegra::GPU& gpu); + void InvalidateStreamBuffer(); + void BindIndexBuffer(GLuint new_index_buffer) { if (index_buffer == new_index_buffer) { return; @@ -100,6 +102,14 @@ public: glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); } + void BindFramebuffer(GLuint new_framebuffer) { + if (framebuffer == new_framebuffer) { + return; + } + framebuffer = new_framebuffer; + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); + } + void NotifyScreenDrawVertexArray() { flags[OpenGL::Dirty::VertexFormats] = true; flags[OpenGL::Dirty::VertexFormat0 + 0] = true; @@ -129,9 +139,9 @@ public: flags[OpenGL::Dirty::Scissor0] = true; } - void NotifyColorMask0() { + void NotifyColorMask(size_t index) { flags[OpenGL::Dirty::ColorMasks] = true; - flags[OpenGL::Dirty::ColorMask0] = true; + flags[OpenGL::Dirty::ColorMask0 + index] = true; } void NotifyBlend0() { @@ -190,6 +200,7 @@ public: private: Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; + GLuint framebuffer = 0; GLuint index_buffer = 0; }; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 887995cf4..e0819cdf2 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) - : buffer_size(size) { +OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) + : state_tracker{state_tracker_} { gl_buffer.Create(); - GLsizeiptr allocate_size = size; - if (vertex_data_usage) { - // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer - // read position is near the end and is an out-of-bound access to the vertex buffer. This is - // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the - // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the - // crash. - allocate_size *= 2; - } - static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; - glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); + glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); mapped_ptr = static_cast<u8*>( - glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); @@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() { gl_buffer.Release(); } -std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { - ASSERT(size <= buffer_size); - ASSERT(alignment <= buffer_size); +std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { + ASSERT(size <= BUFFER_SIZE); + ASSERT(alignment <= BUFFER_SIZE); mapped_size = size; if (alignment > 0) { buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); } - bool invalidate = false; - if (buffer_pos + size > buffer_size) { + if (buffer_pos + size > BUFFER_SIZE) { MICROPROFILE_SCOPE(OpenGL_StreamBuffer); glInvalidateBufferData(gl_buffer.handle); + state_tracker.InvalidateStreamBuffer(); buffer_pos = 0; - invalidate = true; } - return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); + return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); } void OGLStreamBuffer::Unmap(GLsizeiptr size) { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 307a67113..dd9cf67eb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -4,29 +4,31 @@ #pragma once -#include <tuple> +#include <utility> + #include <glad/glad.h> + #include "common/common_types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { class Device; +class StateTracker; class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); + explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); ~OGLStreamBuffer(); /* * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes * and the optional alignment requirement. * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. - * The return values are the pointer to the new chunk, the offset within the buffer, - * and the invalidation flag for previous chunks. + * The return values are the pointer to the new chunk, and the offset within the buffer. * The actual used size must be specified on unmapping the chunk. */ - std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); + std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0); void Unmap(GLsizeiptr size); @@ -39,15 +41,18 @@ public: } GLsizeiptr Size() const noexcept { - return buffer_size; + return BUFFER_SIZE; } private: + static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; + + StateTracker& state_tracker; + OGLBuffer gl_buffer; GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; - GLsizeiptr buffer_size = 0; GLsizeiptr mapped_size = 0; u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a863ef218..4c690418c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -2,173 +2,238 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/assert.h" -#include "common/bit_util.h" -#include "common/common_types.h" -#include "common/microprofile.h" -#include "common/scope_exit.h" -#include "core/core.h" -#include "video_core/morton.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include <algorithm> +#include <array> +#include <bit> +#include <string> + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/renderer_opengl/utils.h" -#include "video_core/texture_cache/surface_base.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache.h" -#include "video_core/textures/convert.h" -#include "video_core/textures/texture.h" +#include "video_core/textures/decoders.h" namespace OpenGL { -using Tegra::Texture::SwizzleSource; -using VideoCore::MortonSwizzleMode; +namespace { +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureMipmapFilter; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCommon::CalculateLevelStrideAlignment; +using VideoCommon::ImageCopy; +using VideoCommon::ImageFlagBits; +using VideoCommon::ImageType; +using VideoCommon::NUM_RT; +using VideoCommon::SamplesLog2; +using VideoCommon::SwizzleParameters; +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsPixelFormatSRGB; +using VideoCore::Surface::MaxPixelFormat; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; -MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", - MP_RGB(128, 192, 128)); +struct CopyOrigin { + GLint level; + GLint x; + GLint y; + GLint z; +}; -namespace { +struct CopyRegion { + GLsizei width; + GLsizei height; + GLsizei depth; +}; struct FormatTuple { GLenum internal_format; GLenum format = GL_NONE; GLenum type = GL_NONE; + GLenum store_format = internal_format; }; -constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - - // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - - // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM +constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT }}; +constexpr std::array ACCELERATED_FORMATS{ + GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, + GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, + GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I, + GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I, + GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16, + GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM, + GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, +}; + const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); - return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; + ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; } -GLenum GetTextureTarget(const SurfaceTarget& target) { - switch (target) { - case SurfaceTarget::TextureBuffer: +GLenum ImageTarget(const VideoCommon::ImageInfo& info) { + switch (info.type) { + case ImageType::e1D: + return GL_TEXTURE_1D_ARRAY; + case ImageType::e2D: + if (info.num_samples > 1) { + return GL_TEXTURE_2D_MULTISAMPLE_ARRAY; + } + return GL_TEXTURE_2D_ARRAY; + case ImageType::e3D: + return GL_TEXTURE_3D; + case ImageType::Linear: + return GL_TEXTURE_2D_ARRAY; + case ImageType::Buffer: return GL_TEXTURE_BUFFER; - case SurfaceTarget::Texture1D: + } + UNREACHABLE_MSG("Invalid image type={}", info.type); + return GL_NONE; +} + +GLenum ImageTarget(ImageViewType type, int num_samples = 1) { + const bool is_multisampled = num_samples > 1; + switch (type) { + case ImageViewType::e1D: return GL_TEXTURE_1D; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D; - case SurfaceTarget::Texture3D: + case ImageViewType::e2D: + return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; + case ImageViewType::Cube: + return GL_TEXTURE_CUBE_MAP; + case ImageViewType::e3D: return GL_TEXTURE_3D; - case SurfaceTarget::Texture1DArray: + case ImageViewType::e1DArray: return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2DArray: - return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP; - case SurfaceTarget::TextureCubeArray: + case ImageViewType::e2DArray: + return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; + case ImageViewType::CubeArray: return GL_TEXTURE_CUBE_MAP_ARRAY; + case ImageViewType::Rect: + return GL_TEXTURE_RECTANGLE; + case ImageViewType::Buffer: + return GL_TEXTURE_BUFFER; } - UNREACHABLE(); - return {}; + UNREACHABLE_MSG("Invalid image view type={}", type); + return GL_NONE; } -GLint GetSwizzleSource(SwizzleSource source) { +GLenum TextureMode(PixelFormat format, bool is_first) { + switch (format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; + case PixelFormat::S8_UINT_D24_UNORM: + return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + default: + UNREACHABLE(); + return GL_DEPTH_COMPONENT; + } +} + +GLint Swizzle(SwizzleSource source) { switch (source) { case SwizzleSource::Zero: return GL_ZERO; @@ -184,531 +249,813 @@ GLint GetSwizzleSource(SwizzleSource source) { case SwizzleSource::OneFloat: return GL_ONE; } - UNREACHABLE(); + UNREACHABLE_MSG("Invalid swizzle source={}", source); return GL_NONE; } -GLenum GetComponent(PixelFormat format, bool is_first) { - switch (format) { - case PixelFormat::D24_UNORM_S8_UINT: - case PixelFormat::D32_FLOAT_S8_UINT: - return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; - case PixelFormat::S8_UINT_D24_UNORM: - return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; +GLenum AttachmentType(PixelFormat format) { + switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { + case SurfaceType::Depth: + return GL_DEPTH_ATTACHMENT; + case SurfaceType::DepthStencil: + return GL_DEPTH_STENCIL_ATTACHMENT; default: - UNREACHABLE(); - return GL_DEPTH_COMPONENT; + UNIMPLEMENTED_MSG("Unimplemented type={}", type); + return GL_NONE; } } -void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { - if (params.IsBuffer()) { - return; +[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) { + if (!device.HasASTC() && IsPixelFormatASTC(format)) { + return true; } - glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); - if (params.num_levels == 1) { - glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); + switch (format) { + case PixelFormat::BC4_UNORM: + case PixelFormat::BC5_UNORM: + return type == ImageType::e3D; + default: + break; } + return false; } -OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, - OGLBuffer& texture_buffer) { - OGLTexture texture; - texture.Create(target); +[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { + switch (value) { + case SwizzleSource::G: + return SwizzleSource::R; + default: + return value; + } +} - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); - break; - case SurfaceTarget::TextureBuffer: - texture_buffer.Create(); - glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), - nullptr, GL_DYNAMIC_STORAGE_BIT); - glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); +void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) { + switch (format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: + UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G); + glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, + TextureMode(format, swizzle[0] == SwizzleSource::R)); + std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); break; - case SurfaceTarget::Texture2D: - case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, - params.height); + default: break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, - params.height, params.depth); + } + std::array<GLint, 4> gl_swizzle; + std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle); + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); +} + +[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, + const VideoCommon::ImageInfo& info) { + // Disable accelerated uploads for now as they don't implement swizzled uploads + return false; + switch (info.type) { + case ImageType::e2D: + case ImageType::e3D: + case ImageType::Linear: break; default: - UNREACHABLE(); + return false; + } + const GLenum internal_format = GetFormatTuple(info.format).internal_format; + const auto& format_info = runtime.FormatInfo(info.type, internal_format); + if (format_info.is_compressed) { + return false; + } + if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { + return false; } + if (format_info.compatibility_by_size) { + return true; + } + const GLenum store_format = StoreFormat(BytesPerBlock(info.format)); + const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class; + return format_info.compatibility_class == store_class; +} - ApplyTextureDefaults(params, texture.handle); +[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, + VideoCommon::SubresourceLayers subresource, GLenum target) { + switch (target) { + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(offset.y), + .z = static_cast<GLint>(subresource.base_layer), + }; + case GL_TEXTURE_3D: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(offset.y), + .z = static_cast<GLint>(offset.z), + }; + default: + UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); + return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0}; + } +} - return texture; +[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent, + VideoCommon::SubresourceLayers dst_subresource, + GLenum target) { + switch (target) { + case GL_TEXTURE_2D_ARRAY: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(extent.height), + .depth = static_cast<GLsizei>(dst_subresource.num_layers), + }; + case GL_TEXTURE_3D: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(extent.height), + .depth = static_cast<GLsizei>(extent.depth), + }; + default: + UNIMPLEMENTED_MSG("Unimplemented copy target={}", target); + return CopyRegion{.width = 0, .height = 0, .depth = 0}; + } } -constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, - SwizzleSource w_source) { - return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | - (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); +void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { + if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { + const GLuint texture = image_view->DefaultHandle(); + glNamedFramebufferTexture(fbo, attachment, texture, 0); + return; + } + const GLuint texture = image_view->Handle(ImageViewType::e3D); + if (image_view->range.extent.layers > 1) { + // TODO: OpenGL doesn't support rendering to a fixed number of slices + glNamedFramebufferTexture(fbo, attachment, texture, 0); + } else { + const u32 slice = image_view->range.base.layer; + glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice); + } } } // Anonymous namespace -CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, - bool is_astc_supported) - : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) { - if (is_converted) { - internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; - format = GL_RGBA; - type = GL_UNSIGNED_BYTE; - } else { - const auto& tuple{GetFormatTuple(params.pixel_format)}; - internal_format = tuple.internal_format; - format = tuple.format; - type = tuple.type; - is_compressed = params.IsCompressed(); - } - target = GetTextureTarget(params.target); - texture = CreateTexture(params, target, internal_format, texture_buffer); - DecorateSurfaceName(); +ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) + : span(map, size), sync{sync_}, handle{handle_} {} - u32 num_layers = 1; - if (params.is_layered || params.target == SurfaceTarget::Texture3D) { - num_layers = params.depth; +ImageBufferMap::~ImageBufferMap() { + if (sync) { + sync->Create(); } - - main_view = - CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); } -CachedSurface::~CachedSurface() = default; +TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, + StateTracker& state_tracker_) + : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { + static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; + for (size_t i = 0; i < TARGETS.size(); ++i) { + const GLenum target = TARGETS[i]; + for (const FormatTuple& tuple : FORMAT_TABLE) { + const GLenum format = tuple.internal_format; + GLint compat_class; + GLint compat_type; + GLint is_compressed; + glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class); + glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1, + &compat_type); + glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed); + const FormatProperties properties{ + .compatibility_class = static_cast<GLenum>(compat_class), + .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE, + .is_compressed = is_compressed == GL_TRUE, + }; + format_properties[i].emplace(format, properties); + } + } + null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); + null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); + null_image_3d.Create(GL_TEXTURE_3D); + null_image_rect.Create(GL_TEXTURE_RECTANGLE); + glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); + glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); + glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); + glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); + + std::array<GLuint, 4> new_handles; + glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); + null_image_view_1d.handle = new_handles[0]; + null_image_view_2d.handle = new_handles[1]; + null_image_view_2d_array.handle = new_handles[2]; + null_image_view_cube.handle = new_handles[3]; + glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1, + 0, 1); + glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0, + 1, 0, 1); + glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY, + null_image_cube_array.handle, GL_R8, 0, 1, 0, 1); + glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, + GL_R8, 0, 1, 0, 6); + const std::array texture_handles{ + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, + null_image_view_2d_array.handle, null_image_view_cube.handle, + }; + for (const GLuint handle : texture_handles) { + static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); + } + const auto set_view = [this](ImageViewType type, GLuint handle) { + if (device.HasDebuggingToolAttached()) { + const std::string name = fmt::format("NullImage {}", type); + glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); + } + null_image_views[static_cast<size_t>(type)] = handle; + }; + set_view(ImageViewType::e1D, null_image_view_1d.handle); + set_view(ImageViewType::e2D, null_image_view_2d.handle); + set_view(ImageViewType::Cube, null_image_view_cube.handle); + set_view(ImageViewType::e3D, null_image_3d.handle); + set_view(ImageViewType::e1DArray, null_image_1d_array.handle); + set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); + set_view(ImageViewType::CubeArray, null_image_cube_array.handle); + set_view(ImageViewType::Rect, null_image_rect.handle); +} -void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { - MICROPROFILE_SCOPE(OpenGL_Texture_Download); +TextureCacheRuntime::~TextureCacheRuntime() = default; - if (params.IsBuffer()) { - glGetNamedBufferSubData(texture_buffer.handle, 0, - static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), - staging_buffer.data()); - return; - } +void TextureCacheRuntime::Finish() { + glFinish(); +} - SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); +ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { + return upload_buffers.RequestMap(size, true); +} - for (u32 level = 0; level < params.emulated_levels; ++level) { - glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); +ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { + return download_buffers.RequestMap(size, false); +} - u8* const mip_data = staging_buffer.data() + mip_offset; - const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); - if (is_compressed) { - glGetCompressedTextureImage(texture.handle, level, size, mip_data); - } else { - glGetTextureImage(texture.handle, level, format, type, size, mip_data); - } +void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, + std::span<const ImageCopy> copies) { + const GLuint dst_name = dst_image.Handle(); + const GLuint src_name = src_image.Handle(); + const GLenum dst_target = ImageTarget(dst_image.info); + const GLenum src_target = ImageTarget(src_image.info); + for (const ImageCopy& copy : copies) { + const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target); + const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target); + const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target); + glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y, + src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x, + dst_origin.y, dst_origin.z, region.width, region.height, region.depth); } } -void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { - MICROPROFILE_SCOPE(OpenGL_Texture_Upload); - SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.emulated_levels; ++level) { - UploadTextureMipmap(level, staging_buffer); +bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { + if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { + return false; } + return true; } -void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { - glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); - - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); - const u8* buffer{staging_buffer.data() + mip_offset}; - if (is_compressed) { - const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; - switch (params.target) { - case SurfaceTarget::Texture2D: - glCompressedTextureSubImage2D(texture.handle, level, 0, 0, - static_cast<GLsizei>(params.GetMipWidth(level)), - static_cast<GLsizei>(params.GetMipHeight(level)), - internal_format, image_size, buffer); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, - static_cast<GLsizei>(params.GetMipWidth(level)), - static_cast<GLsizei>(params.GetMipHeight(level)), - static_cast<GLsizei>(params.GetMipDepth(level)), - internal_format, image_size, buffer); - break; - case SurfaceTarget::TextureCubemap: { - const std::size_t layer_size{params.GetHostLayerSize(level)}; - for (std::size_t face = 0; face < params.depth; ++face) { - glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), - static_cast<GLsizei>(params.GetMipWidth(level)), - static_cast<GLsizei>(params.GetMipHeight(level)), 1, - internal_format, static_cast<GLsizei>(layer_size), - buffer); - buffer += layer_size; - } - break; - } - default: - UNREACHABLE(); - } +void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, + std::span<const ImageCopy> copies) { + if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { + ASSERT(src.info.type == ImageType::e3D); + util_shaders.CopyBC4(dst, src, copies); } else { - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, - buffer); - break; - case SurfaceTarget::TextureBuffer: - ASSERT(level == 0); - glNamedBufferSubData(texture_buffer.handle, 0, - params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); - break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2D: - glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), - params.GetMipHeight(level), format, type, buffer); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D( - texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)), - static_cast<GLsizei>(params.GetMipHeight(level)), - static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer); - break; - case SurfaceTarget::TextureCubemap: - for (std::size_t face = 0; face < params.depth; ++face) { - glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), - params.GetMipWidth(level), params.GetMipHeight(level), 1, - format, type, buffer); - buffer += params.GetHostLayerSize(level); - } - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); } } -void CachedSurface::DecorateSurfaceName() { - LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); -} +void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, + const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + state_tracker.NotifyScissor0(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); -void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { - LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); + ASSERT(dst->BufferBits() == src->BufferBits()); + + glEnable(GL_FRAMEBUFFER_SRGB); + glDisable(GL_RASTERIZER_DISCARD); + glDisablei(GL_SCISSOR_TEST, 0); + + const GLbitfield buffer_bits = dst->BufferBits(); + const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0; + const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear; + glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y, + src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y, + dst_region[1].x, dst_region[1].y, buffer_bits, + is_linear ? GL_LINEAR : GL_NEAREST); } -View CachedSurface::CreateView(const ViewParams& view_key) { - return CreateViewInner(view_key, false); +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, + size_t buffer_offset, + std::span<const SwizzleParameters> swizzles) { + switch (image.info.type) { + case ImageType::e2D: + return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); + case ImageType::e3D: + return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); + case ImageType::Linear: + return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); + default: + UNREACHABLE(); + break; + } } -View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { - auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); - views[view_key] = view; - if (!is_proxy) - view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); - return view; +void TextureCacheRuntime::InsertUploadMemoryBarrier() { + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, - bool is_proxy) - : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format}, - target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { - if (!is_proxy) { - main_view = CreateTextureView(); +FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const { + switch (type) { + case ImageType::e1D: + return format_properties[0].at(internal_format); + case ImageType::e2D: + case ImageType::Linear: + return format_properties[1].at(internal_format); + case ImageType::e3D: + return format_properties[2].at(internal_format); + default: + UNREACHABLE(); + return FormatProperties{}; } } -CachedSurfaceView::~CachedSurfaceView() = default; +TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) + : storage_flags{storage_flags_}, map_flags{map_flags_} {} -void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { - ASSERT(params.num_levels == 1); +TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; - if (params.target == SurfaceTarget::Texture3D) { - if (params.num_layers > 1) { - ASSERT(params.base_layer == 0); - glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); - } else { - glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, - params.base_level, params.base_layer); - } - return; +ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, + bool insert_fence) { + const size_t index = RequestBuffer(requested_size); + OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; + return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); +} + +size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { + if (const std::optional<size_t> index = FindBuffer(requested_size); index) { + return *index; } - if (params.num_layers > 1) { - UNIMPLEMENTED_IF(params.base_layer != 0); - glFramebufferTexture(fb_target, attachment, GetTexture(), 0); - return; + OGLBuffer& buffer = buffers.emplace_back(); + buffer.Create(); + glNamedBufferStorage(buffer.handle, requested_size, nullptr, + storage_flags | GL_MAP_PERSISTENT_BIT); + maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size, + map_flags | GL_MAP_PERSISTENT_BIT))); + + syncs.emplace_back(); + sizes.push_back(requested_size); + + ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && + maps.size() == sizes.size()); + + return buffers.size() - 1; +} + +std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { + size_t smallest_buffer = std::numeric_limits<size_t>::max(); + std::optional<size_t> found; + const size_t num_buffers = sizes.size(); + for (size_t index = 0; index < num_buffers; ++index) { + const size_t buffer_size = sizes[index]; + if (buffer_size < requested_size || buffer_size >= smallest_buffer) { + continue; + } + if (syncs[index].handle != 0) { + GLint status; + glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status); + if (status != GL_SIGNALED) { + continue; + } + syncs[index].Release(); + } + smallest_buffer = buffer_size; + found = index; } + return found; +} - const GLenum view_target = surface.GetTarget(); - const GLuint texture = surface.GetTexture(); - switch (surface.GetSurfaceParams().target) { - case SurfaceTarget::Texture1D: - glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); +Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, + VAddr cpu_addr_) + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { + if (CanBeAccelerated(runtime, info)) { + flags |= ImageFlagBits::AcceleratedUpload; + } + if (IsConverted(runtime.device, info.format, info.type)) { + flags |= ImageFlagBits::Converted; + gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; + gl_store_format = GL_RGBA8; + gl_format = GL_RGBA; + gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } else { + const auto& tuple = GetFormatTuple(info.format); + gl_internal_format = tuple.internal_format; + gl_store_format = tuple.store_format; + gl_format = tuple.format; + gl_type = tuple.type; + } + const GLenum target = ImageTarget(info); + const GLsizei width = info.size.width; + const GLsizei height = info.size.height; + const GLsizei depth = info.size.depth; + const int max_host_mip_levels = std::bit_width(info.size.width); + const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); + const GLsizei num_layers = info.resources.layers; + const GLsizei num_samples = info.num_samples; + + GLuint handle = 0; + if (target != GL_TEXTURE_BUFFER) { + texture.Create(target); + handle = texture.handle; + } + switch (target) { + case GL_TEXTURE_1D_ARRAY: + glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers); break; - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); + case GL_TEXTURE_2D_ARRAY: + glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers); break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, - params.base_layer); + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { + // TODO: Where should 'fixedsamplelocations' come from? + const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); + glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x, + height >> samples_y, num_layers, GL_FALSE); + break; + } + case GL_TEXTURE_RECTANGLE: + glTextureStorage2D(handle, num_levels, gl_store_format, width, height); + break; + case GL_TEXTURE_3D: + glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth); + break; + case GL_TEXTURE_BUFFER: + buffer.Create(); + glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); break; default: - UNIMPLEMENTED(); + UNREACHABLE_MSG("Invalid target=0x{:x}", target); + break; + } + if (runtime.device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(*this); + glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, + static_cast<GLsizei>(name.size()), name.data()); } } -GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { - if (GetSurfaceParams().IsBuffer()) { - return GetTexture(); - } - const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (current_swizzle == new_swizzle) { - return current_view; - } - current_swizzle = new_swizzle; +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies) { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); - const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); - OGLTextureView& view = entry->second; - if (!is_cache_miss) { - current_view = view.handle; - return view.handle; - } - view = CreateTextureView(); - current_view = view.handle; + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - std::array swizzle{x_source, y_source, z_source, w_source}; + u32 current_row_length = std::numeric_limits<u32>::max(); + u32 current_image_height = std::numeric_limits<u32>::max(); - switch (const PixelFormat format = GetSurfaceParams().pixel_format) { - case PixelFormat::D24_UNORM_S8_UINT: - case PixelFormat::D32_FLOAT_S8_UINT: - case PixelFormat::S8_UINT_D24_UNORM: - UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); - glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, - GetComponent(format, x_source == SwizzleSource::R)); - - // Make sure we sample the first component - std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { - return value == SwizzleSource::G ? SwizzleSource::R : value; - }); - [[fallthrough]]; - default: { - const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), - GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; - glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); - break; - } + for (const VideoCommon::BufferImageCopy& copy : copies) { + if (current_row_length != copy.buffer_row_length) { + current_row_length = copy.buffer_row_length; + glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length); + } + if (current_image_height != copy.buffer_image_height) { + current_image_height = copy.buffer_image_height; + glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); + } + CopyBufferToImage(copy, buffer_offset); } - return view.handle; } -OGLTextureView CachedSurfaceView::CreateTextureView() const { - OGLTextureView texture_view; - texture_view.Create(); - - if (target == GL_TEXTURE_3D) { - glTextureView(texture_view.handle, target, surface.texture.handle, format, - params.base_level, params.num_levels, 0, 1); - } else { - glTextureView(texture_view.handle, target, surface.texture.handle, format, - params.base_level, params.num_levels, params.base_layer, params.num_layers); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferCopy> copies) { + for (const VideoCommon::BufferCopy& copy : copies) { + glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, + copy.dst_offset, copy.size); } - ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); - - return texture_view; } -TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, const Device& device, - StateTracker& state_tracker_) - : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{ - state_tracker_} { - src_framebuffer.Create(); - dst_framebuffer.Create(); -} +void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies) { + glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API -TextureCacheOpenGL::~TextureCacheOpenGL() = default; + glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); + glPixelStorei(GL_PACK_ALIGNMENT, 1); -Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); -} + u32 current_row_length = std::numeric_limits<u32>::max(); + u32 current_image_height = std::numeric_limits<u32>::max(); -void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) { - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - if (src_params.type != dst_params.type) { - // A fallback is needed - return; + for (const VideoCommon::BufferImageCopy& copy : copies) { + if (current_row_length != copy.buffer_row_length) { + current_row_length = copy.buffer_row_length; + glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); + } + if (current_image_height != copy.buffer_image_height) { + current_image_height = copy.buffer_image_height; + glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); + } + CopyImageToBuffer(copy, buffer_offset); } - const auto src_handle = src_surface->GetTexture(); - const auto src_target = src_surface->GetTarget(); - const auto dst_handle = dst_surface->GetTexture(); - const auto dst_target = dst_surface->GetTarget(); - glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, - copy_params.source_y, copy_params.source_z, dst_handle, dst_target, - copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, - copy_params.dest_z, copy_params.width, copy_params.height, - copy_params.depth); } -void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) { - const auto& src_params{src_view->GetSurfaceParams()}; - const auto& dst_params{dst_view->GetSurfaceParams()}; - UNIMPLEMENTED_IF(src_params.depth != 1); - UNIMPLEMENTED_IF(dst_params.depth != 1); - - state_tracker.NotifyScissor0(); - state_tracker.NotifyFramebuffer(); - state_tracker.NotifyRasterizeEnable(); - state_tracker.NotifyFramebufferSRGB(); +void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { + // Compressed formats don't have a pixel format or type + const bool is_compressed = gl_format == GL_NONE; + const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset); - if (dst_params.srgb_conversion) { - glEnable(GL_FRAMEBUFFER_SRGB); - } else { - glDisable(GL_FRAMEBUFFER_SRGB); + switch (info.type) { + case ImageType::e1D: + if (is_compressed) { + glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_subresource.base_layer, + copy.image_extent.width, + copy.image_subresource.num_layers, gl_internal_format, + static_cast<GLsizei>(copy.buffer_size), offset); + } else { + glTextureSubImage2D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_subresource.base_layer, + copy.image_extent.width, copy.image_subresource.num_layers, + gl_format, gl_type, offset); + } + break; + case ImageType::e2D: + case ImageType::Linear: + if (is_compressed) { + glCompressedTextureSubImage3D( + texture.handle, copy.image_subresource.base_level, copy.image_offset.x, + copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width, + copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format, + static_cast<GLsizei>(copy.buffer_size), offset); + } else { + glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_offset.y, + copy.image_subresource.base_layer, copy.image_extent.width, + copy.image_extent.height, copy.image_subresource.num_layers, + gl_format, gl_type, offset); + } + break; + case ImageType::e3D: + if (is_compressed) { + glCompressedTextureSubImage3D( + texture.handle, copy.image_subresource.base_level, copy.image_offset.x, + copy.image_offset.y, copy.image_offset.z, copy.image_extent.width, + copy.image_extent.height, copy.image_extent.depth, gl_internal_format, + static_cast<GLsizei>(copy.buffer_size), offset); + } else { + glTextureSubImage3D(texture.handle, copy.image_subresource.base_level, + copy.image_offset.x, copy.image_offset.y, copy.image_offset.z, + copy.image_extent.width, copy.image_extent.height, + copy.image_extent.depth, gl_format, gl_type, offset); + } + break; + default: + UNREACHABLE(); } - glDisable(GL_RASTERIZER_DISCARD); - glDisablei(GL_SCISSOR_TEST, 0); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); - - GLenum buffers = 0; - if (src_params.type == SurfaceType::ColorTexture) { - src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +} - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { + const GLint x_offset = copy.image_offset.x; + const GLsizei width = copy.image_extent.width; - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); + const GLint level = copy.image_subresource.base_level; + const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size); + void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); + GLint y_offset = 0; + GLint z_offset = 0; + GLsizei height = 1; + GLsizei depth = 1; - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + switch (info.type) { + case ImageType::e1D: + y_offset = copy.image_subresource.base_layer; + height = copy.image_subresource.num_layers; + break; + case ImageType::e2D: + case ImageType::Linear: + y_offset = copy.image_offset.y; + z_offset = copy.image_subresource.base_layer; + height = copy.image_extent.height; + depth = copy.image_subresource.num_layers; + break; + case ImageType::e3D: + y_offset = copy.image_offset.y; + z_offset = copy.image_offset.z; + height = copy.image_extent.height; + depth = copy.image_extent.depth; + break; + default: + UNREACHABLE(); + } + // Compressed formats don't have a pixel format or type + const bool is_compressed = gl_format == GL_NONE; + if (is_compressed) { + glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, + height, depth, buffer_size, offset); + } else { + glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height, + depth, gl_format, gl_type, buffer_size, offset); } - - const Common::Rectangle<u32>& src_rect = copy_config.src_rect; - const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; - const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - - glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top), - static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom), - static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top), - static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom), - buffers, - is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); } -void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { - MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, + ImageId image_id_, Image& image) + : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { + const Device& device = runtime.device; + if (True(image.flags & ImageFlagBits::Converted)) { + internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; + } else { + internal_format = GetFormatTuple(format).internal_format; + } + VideoCommon::SubresourceRange flatten_range = info.range; + std::array<GLuint, 2> handles; + stored_views.reserve(2); - const auto source_format = GetFormatTuple(src_params.pixel_format); - const auto dest_format = GetFormatTuple(dst_params.pixel_format); + switch (info.type) { + case ImageViewType::e1DArray: + flatten_range.extent.layers = 1; + [[fallthrough]]; + case ImageViewType::e1D: + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + break; + case ImageViewType::e2DArray: + flatten_range.extent.layers = 1; + [[fallthrough]]; + case ImageViewType::e2D: + if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { + // 2D and 2D array views on a 3D textures are used exclusively for render targets + ASSERT(info.range.extent.levels == 1); + const VideoCommon::SubresourceRange slice_range{ + .base = {.level = info.range.base.level, .layer = 0}, + .extent = {.levels = 1, .layers = 1}, + }; + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); + break; + } + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); + break; + case ImageViewType::e3D: + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + break; + case ImageViewType::CubeArray: + flatten_range.extent.layers = 6; + [[fallthrough]]; + case ImageViewType::Cube: + glGenTextures(2, handles.data()); + SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); + SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + break; + case ImageViewType::Rect: + glGenTextures(1, handles.data()); + SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + break; + case ImageViewType::Buffer: + glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); + SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + break; + } + default_handle = Handle(info.type); +} - const std::size_t source_size = src_surface->GetHostSizeInBytes(); - const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) + : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} - const std::size_t buffer_size = std::max(source_size, dest_size); +void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, + GLuint handle, const VideoCommon::ImageViewInfo& info, + VideoCommon::SubresourceRange view_range) { + if (info.type == ImageViewType::Buffer) { + // TODO: Take offset from buffer cache + glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, + image.guest_size_bytes); + } else { + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); + } + } + if (device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(*this, view_type); + glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); + } + stored_views.emplace_back().handle = handle; + views[static_cast<size_t>(view_type)] = handle; +} - GLuint copy_pbo_handle = FetchPBO(buffer_size); +Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { + const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE; + const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func); + const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None); + const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter); + const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter); + const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE; + + UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); + UNIMPLEMENTED_IF(config.float_coord_normalization != 0); + + sampler.Create(); + const GLuint handle = sampler.handle; + glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); + glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); + glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); + glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); + glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); + glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); + glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); + + if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); + } else { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); + } + if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { + glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); + } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); + } + if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { + glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); + } else if (seamless == GL_FALSE) { + // We default to false because it's more common + LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + } +} - glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { + // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of + // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared + // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with + // mismatching size, this is why core framebuffers are preferred. + GLuint handle; + glGenFramebuffers(1, &handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); + + GLsizei num_buffers = 0; + std::array<GLenum, NUM_RT> gl_draw_buffers; + gl_draw_buffers.fill(GL_NONE); + + for (size_t index = 0; index < color_buffers.size(); ++index) { + const ImageView* const image_view = color_buffers[index]; + if (!image_view) { + continue; + } + buffer_bits |= GL_COLOR_BUFFER_BIT; + gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index]; + num_buffers = static_cast<GLsizei>(index + 1); - if (src_surface->IsCompressed()) { - glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), - nullptr); - } else { - glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, - static_cast<GLsizei>(source_size), nullptr); + const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index); + AttachTexture(handle, attachment, image_view); } - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); + if (const ImageView* const image_view = depth_buffer; image_view) { + if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { + buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } else { + buffer_bits |= GL_DEPTH_BUFFER_BIT; + } + const GLenum attachment = AttachmentType(image_view->format); + AttachTexture(handle, attachment, image_view); + } - const GLsizei width = static_cast<GLsizei>(dst_params.width); - const GLsizei height = static_cast<GLsizei>(dst_params.height); - const GLsizei depth = static_cast<GLsizei>(dst_params.depth); - if (dst_surface->IsCompressed()) { - LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); - UNREACHABLE(); + if (num_buffers > 1) { + glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data()); + } else if (num_buffers > 0) { + glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]); } else { - switch (dst_params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceTarget::Texture2D: - glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::TextureCubemap: - glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, - dest_format.format, dest_format.type, nullptr); - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast<u32>(dst_params.target)); - UNREACHABLE(); - } + glNamedFramebufferDrawBuffer(handle, GL_NONE); } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glTextureBarrier(); -} + glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width); + glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height); + // TODO + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...); + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...); + // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...); -GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { - ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); - const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); - OGLBuffer& cp = copy_pbo_cache[l2]; - if (cp.handle == 0) { - const std::size_t ceil_size = 1ULL << l2; - cp.Create(); - cp.MakeStreamCopy(ceil_size); + if (runtime.device.HasDebuggingToolAttached()) { + const std::string name = VideoCommon::Name(key); + glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data()); } - return cp.handle; + framebuffer.handle = handle; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 7787134fc..04193e31e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -4,156 +4,247 @@ #pragma once -#include <array> -#include <functional> #include <memory> -#include <unordered_map> -#include <utility> -#include <vector> +#include <span> #include <glad/glad.h> -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { -using VideoCommon::SurfaceParams; -using VideoCommon::ViewParams; - -class CachedSurfaceView; -class CachedSurface; -class TextureCacheOpenGL; +class Device; +class ProgramManager; class StateTracker; -using Surface = std::shared_ptr<CachedSurface>; -using View = std::shared_ptr<CachedSurfaceView>; -using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; +class Framebuffer; +class Image; +class ImageView; +class Sampler; -class CachedSurface final : public VideoCommon::SurfaceBase<View> { - friend CachedSurfaceView; +using VideoCommon::ImageId; +using VideoCommon::ImageViewId; +using VideoCommon::ImageViewType; +using VideoCommon::NUM_RT; +using VideoCommon::Offset2D; +using VideoCommon::RenderTargets; +class ImageBufferMap { public: - explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported); - ~CachedSurface(); - - void UploadTexture(const std::vector<u8>& staging_buffer) override; - void DownloadTexture(std::vector<u8>& staging_buffer) override; + explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); + ~ImageBufferMap(); - GLenum GetTarget() const { - return target; + GLuint Handle() const noexcept { + return handle; } - GLuint GetTexture() const { - return texture.handle; + std::span<u8> Span() const noexcept { + return span; } - bool IsCompressed() const { - return is_compressed; +private: + std::span<u8> span; + OGLSync* sync; + GLuint handle; +}; + +struct FormatProperties { + GLenum compatibility_class; + bool compatibility_by_size; + bool is_compressed; +}; + +class TextureCacheRuntime { + friend Framebuffer; + friend Image; + friend ImageView; + friend Sampler; + +public: + explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, + StateTracker& state_tracker); + ~TextureCacheRuntime(); + + void Finish(); + + ImageBufferMap MapUploadBuffer(size_t size); + + ImageBufferMap MapDownloadBuffer(size_t size); + + void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + UNIMPLEMENTED(); } -protected: - void DecorateSurfaceName() override; + bool CanImageBeCopied(const Image& dst, const Image& src); + + void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + + void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, + const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); - View CreateView(const ViewParams& view_key) override; - View CreateViewInner(const ViewParams& view_key, bool is_proxy); + void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::SwizzleParameters> swizzles); + + void InsertUploadMemoryBarrier(); + + FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; private: - void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); + struct StagingBuffers { + explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); + ~StagingBuffers(); - GLenum internal_format{}; - GLenum format{}; - GLenum type{}; - bool is_compressed{}; - GLenum target{}; - u32 view_count{}; + ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); - OGLTexture texture; - OGLBuffer texture_buffer; + size_t RequestBuffer(size_t requested_size); + + std::optional<size_t> FindBuffer(size_t requested_size); + + std::vector<OGLSync> syncs; + std::vector<OGLBuffer> buffers; + std::vector<u8*> maps; + std::vector<size_t> sizes; + GLenum storage_flags; + GLenum map_flags; + }; + + const Device& device; + StateTracker& state_tracker; + UtilShaders util_shaders; + + std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; + + StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; + StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT}; + + OGLTexture null_image_1d_array; + OGLTexture null_image_cube_array; + OGLTexture null_image_3d; + OGLTexture null_image_rect; + OGLTextureView null_image_view_1d; + OGLTextureView null_image_view_2d; + OGLTextureView null_image_view_2d_array; + OGLTextureView null_image_view_cube; + + std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; }; -class CachedSurfaceView final : public VideoCommon::ViewBase { +class Image : public VideoCommon::ImageBase { + friend ImageView; + public: - explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); - ~CachedSurfaceView(); + explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, + VAddr cpu_addr); - /// @brief Attaches this texture view to the currently bound fb_target framebuffer - /// @param attachment Attachment to bind textures to - /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) - void Attach(GLenum attachment, GLenum fb_target) const; + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies); - GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferCopy> copies); - void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); + void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies); - void MarkAsModified(u64 tick) { - surface.MarkAsModified(true, tick); + GLuint Handle() const noexcept { + return texture.handle; } - GLuint GetTexture() const { - if (is_proxy) { - return surface.GetTexture(); - } - return main_view.handle; +private: + void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + + void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + + OGLTexture texture; + OGLTextureView store_view; + OGLBuffer buffer; + GLenum gl_internal_format = GL_NONE; + GLenum gl_store_format = GL_NONE; + GLenum gl_format = GL_NONE; + GLenum gl_type = GL_NONE; +}; + +class ImageView : public VideoCommon::ImageViewBase { + friend Image; + +public: + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + + [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { + return views[static_cast<size_t>(query_type)]; } - GLenum GetFormat() const { - return format; + [[nodiscard]] GLuint DefaultHandle() const noexcept { + return default_handle; } - const SurfaceParams& GetSurfaceParams() const { - return surface.GetSurfaceParams(); + [[nodiscard]] GLenum Format() const noexcept { + return internal_format; } private: - OGLTextureView CreateTextureView() const; + void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + const VideoCommon::ImageViewInfo& info, + VideoCommon::SubresourceRange view_range); + + std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; + std::vector<OGLTextureView> stored_views; + GLuint default_handle = 0; + GLenum internal_format = GL_NONE; +}; + +class ImageAlloc : public VideoCommon::ImageAllocBase {}; - CachedSurface& surface; - const GLenum format; - const GLenum target; - const bool is_proxy; +class Sampler { +public: + explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); - std::unordered_map<u32, OGLTextureView> view_cache; - OGLTextureView main_view; + GLuint Handle() const noexcept { + return sampler.handle; + } - // Use an invalid default so it always fails the comparison test - u32 current_swizzle = 0xffffffff; - GLuint current_view = 0; +private: + OGLSampler sampler; }; -class TextureCacheOpenGL final : public TextureCacheBase { +class Framebuffer { public: - explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, const Device& device, - StateTracker& state_tracker); - ~TextureCacheOpenGL(); - -protected: - Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; - - void ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) override; + explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key); - void ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) override; + [[nodiscard]] GLuint Handle() const noexcept { + return framebuffer.handle; + } - void BufferCopy(Surface& src_surface, Surface& dst_surface) override; + [[nodiscard]] GLbitfield BufferBits() const noexcept { + return buffer_bits; + } private: - GLuint FetchPBO(std::size_t buffer_size); - - StateTracker& state_tracker; + OGLFramebuffer framebuffer; + GLbitfield buffer_bits = GL_NONE; +}; - OGLFramebuffer src_framebuffer; - OGLFramebuffer dst_framebuffer; - std::unordered_map<u32, OGLBuffer> copy_pbo_cache; +struct TextureCacheParams { + static constexpr bool ENABLE_VALIDATION = true; + static constexpr bool FRAMEBUFFER_BLITS = true; + static constexpr bool HAS_EMULATED_COPIES = true; + + using Runtime = OpenGL::TextureCacheRuntime; + using Image = OpenGL::Image; + using ImageAlloc = OpenGL::ImageAlloc; + using ImageView = OpenGL::ImageView; + using Sampler = OpenGL::Sampler; + using Framebuffer = OpenGL::Framebuffer; }; +using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index a8be2aa37..cbccfdeb4 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -107,7 +107,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { case Maxwell::IndexFormat::UnsignedInt: return GL_UNSIGNED_INT; } - UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format)); + UNREACHABLE_MSG("Invalid index_format={}", index_format); return {}; } @@ -144,7 +144,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { case Maxwell::PrimitiveTopology::Patches: return GL_PATCHES; } - UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology)); + UNREACHABLE_MSG("Invalid topology={}", topology); return GL_POINTS; } @@ -172,8 +172,8 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, } break; } - UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", - static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode)); + UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}", filter_mode, + mipmap_filter_mode); return GL_NEAREST; } @@ -204,7 +204,7 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { return GL_MIRROR_CLAMP_TO_EDGE; } } - UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); + UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", wrap_mode); return GL_REPEAT; } @@ -227,7 +227,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { case Tegra::Texture::DepthCompareFunc::Always: return GL_ALWAYS; } - UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func)); + UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", func); return GL_GREATER; } @@ -249,7 +249,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { case Maxwell::Blend::Equation::MaxGL: return GL_MAX; } - UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); + UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation); return GL_FUNC_ADD; } @@ -313,7 +313,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: return GL_ONE_MINUS_CONSTANT_ALPHA; } - UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); + UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor); return GL_ZERO; } @@ -333,7 +333,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) { case Tegra::Texture::SwizzleSource::OneFloat: return GL_ONE; } - UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source)); + UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source); return GL_ZERO; } @@ -364,7 +364,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { case Maxwell::ComparisonOp::AlwaysOld: return GL_ALWAYS; } - UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); return GL_ALWAYS; } @@ -395,7 +395,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) { case Maxwell::StencilOp::DecrWrapOGL: return GL_DECR_WRAP; } - UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil)); + UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil); return GL_KEEP; } @@ -406,7 +406,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) { case Maxwell::FrontFace::CounterClockWise: return GL_CCW; } - UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face)); + UNIMPLEMENTED_MSG("Unimplemented front face cull={}", front_face); return GL_CCW; } @@ -419,7 +419,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) { case Maxwell::CullFace::FrontAndBack: return GL_FRONT_AND_BACK; } - UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); + UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face); return GL_BACK; } @@ -458,7 +458,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) { case Maxwell::LogicOperation::Set: return GL_SET; } - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation)); + UNIMPLEMENTED_MSG("Unimplemented logic operation={}", operation); return GL_COPY; } @@ -471,10 +471,23 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { case Maxwell::PolygonMode::Fill: return GL_FILL; } - UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode)); + UNREACHABLE_MSG("Invalid polygon mode={}", polygon_mode); return GL_FILL; } +inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) { + switch (filter) { + case Tegra::Texture::SamplerReduction::WeightedAverage: + return GL_WEIGHTED_AVERAGE_ARB; + case Tegra::Texture::SamplerReduction::Min: + return GL_MIN; + case Tegra::Texture::SamplerReduction::Max: + return GL_MAX; + } + UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter)); + return GL_WEIGHTED_AVERAGE_ARB; +} + inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { // Enumeration order matches register order. We can convert it arithmetically. return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c869bb0e2..dd77a543c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -23,10 +23,10 @@ #include "core/telemetry_session.h" #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" -#include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/textures/decoders.h" namespace OpenGL { @@ -130,8 +130,8 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window_, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, - std::unique_ptr<Core::Frontend::GraphicsContext> context) - : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_}, + std::unique_ptr<Core::Frontend::GraphicsContext> context_) + : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {} RendererOpenGL::~RendererOpenGL() = default; @@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - PrepareRendertarget(framebuffer); RenderScreenshot(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + state_tracker.BindFramebuffer(0); DrawScreen(emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; - const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; - u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; - rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); - // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; - VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, - framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, - gl_framebuffer_data.data(), host_ptr); - + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + const u64 size_in_bytes{Tegra::Texture::CalculateSize( + true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; + const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; + const std::span<const u8> input_data(host_ptr, size_in_bytes); + Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, block_height_log2, + 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); // Update existing texture @@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() { glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + // Generate presentation sampler + present_sampler.Create(); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + // Generate VBO handle for drawing vertex_buffer.Create(); @@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + // Enable seamless cubemaps when per texture parameters are not available + if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { + glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it if (device.HasVertexBufferUnifiedMemory()) { glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); @@ -275,9 +284,9 @@ void RendererOpenGL::AddTelemetryFields() { LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; - telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor); - telemetry_session.AddField(user_system, "GPU_Model", gpu_model); - telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version); + telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor)); + telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model)); + telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } void RendererOpenGL::CreateRasterizer() { @@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const auto pixel_format{ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); GLint internal_format; @@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, internal_format = GL_RGBA8; texture.gl_format = GL_RGBA; texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", - static_cast<u32>(framebuffer.pixel_format)); + // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", + // static_cast<u32>(framebuffer.pixel_format)); } texture.resource.Release(); @@ -348,7 +357,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { } else { // Other transformations are unsupported LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}", - static_cast<u32>(framebuffer_transform_flags)); + framebuffer_transform_flags); UNIMPLEMENTED(); } } @@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyPolygonModes(); state_tracker.NotifyViewport0(); state_tracker.NotifyScissor0(); - state_tracker.NotifyColorMask0(); + state_tracker.NotifyColorMask(0); state_tracker.NotifyBlend0(); state_tracker.NotifyFramebuffer(); state_tracker.NotifyFrontFace(); @@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { } glBindTextureUnit(0, screen_info.display_texture); - glBindSampler(0, 0); + glBindSampler(0, present_sampler.handle); glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); @@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() { DrawScreen(layout); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, renderer_settings.screenshot_bits); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 9ef181f95..44e109794 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -57,10 +57,10 @@ struct ScreenInfo { class RendererOpenGL final : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::TelemetrySession& telemetry_session, - Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, - Tegra::GPU& gpu, - std::unique_ptr<Core::Frontend::GraphicsContext> context); + explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, + Core::Frontend::EmuWindow& emu_window_, + Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, + std::unique_ptr<Core::Frontend::GraphicsContext> context_); ~RendererOpenGL() override; bool Init() override; @@ -102,6 +102,7 @@ private: StateTracker state_tracker{gpu}; // OpenGL object IDs + OGLSampler present_sampler; OGLBuffer vertex_buffer; OGLProgram vertex_program; OGLProgram fragment_program; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp new file mode 100644 index 000000000..eb849cbf2 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -0,0 +1,224 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <bit> +#include <span> +#include <string_view> + +#include <glad/glad.h> + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" +#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" +#include "video_core/host_shaders/opengl_copy_bc4_comp.h" +#include "video_core/host_shaders/pitch_unswizzle_comp.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/renderer_opengl/util_shaders.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace OpenGL { + +using namespace HostShaders; + +using VideoCommon::Extent3D; +using VideoCommon::ImageCopy; +using VideoCommon::ImageType; +using VideoCommon::SwizzleParameters; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams; +using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; +using VideoCore::Surface::BytesPerBlock; + +namespace { + +OGLProgram MakeProgram(std::string_view source) { + OGLShader shader; + shader.Create(source, GL_COMPUTE_SHADER); + + OGLProgram program; + program.Create(true, false, shader.handle); + return program; +} + +} // Anonymous namespace + +UtilShaders::UtilShaders(ProgramManager& program_manager_) + : program_manager{program_manager_}, + block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), + block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), + pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), + copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { + const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); + swizzle_table_buffer.Create(); + glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); +} + +UtilShaders::~UtilShaders() = default; + +void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span<const SwizzleParameters> swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; + static constexpr GLuint BINDING_INPUT_BUFFER = 1; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + + program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + + const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.layer_stride); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, store_format); + glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span<const SwizzleParameters> swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; + + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; + static constexpr GLuint BINDING_INPUT_BUFFER = 1; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); + + const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth); + + const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info); + glUniform3uiv(0, 1, params.origin.data()); + glUniform3iv(1, 1, params.destination.data()); + glUniform1ui(2, params.bytes_per_block_log2); + glUniform1ui(3, params.slice_size); + glUniform1ui(4, params.block_size); + glUniform1ui(5, params.x_shift); + glUniform1ui(6, params.block_height); + glUniform1ui(7, params.block_height_mask); + glUniform1ui(8, params.block_depth); + glUniform1ui(9, params.block_depth_mask); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, store_format); + glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span<const SwizzleParameters> swizzles) { + static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; + static constexpr GLuint BINDING_INPUT_BUFFER = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; + static constexpr GLuint LOC_ORIGIN = 0; + static constexpr GLuint LOC_DESTINATION = 1; + static constexpr GLuint LOC_BYTES_PER_BLOCK = 2; + static constexpr GLuint LOC_PITCH = 3; + + const u32 bytes_per_block = BytesPerBlock(image.info.format); + const GLenum format = StoreFormat(bytes_per_block); + const u32 pitch = image.info.pitch; + + UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), + "Non-power of two images are not implemented"); + + program_manager.BindHostCompute(pitch_unswizzle_program.handle); + glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glUniform2ui(LOC_ORIGIN, 0, 0); + glUniform2i(LOC_DESTINATION, 0, 0); + glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); + glUniform1ui(LOC_PITCH, pitch); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); + for (const SwizzleParameters& swizzle : swizzles) { + const Extent3D num_tiles = swizzle.num_tiles; + const size_t input_offset = swizzle.buffer_offset + buffer_offset; + + const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); + const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); + + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), + input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); + } + program_manager.RestoreGuestCompute(); +} + +void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) { + static constexpr GLuint BINDING_INPUT_IMAGE = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; + static constexpr GLuint LOC_SRC_OFFSET = 0; + static constexpr GLuint LOC_DST_OFFSET = 1; + + program_manager.BindHostCompute(copy_bc4_program.handle); + + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_subresource.base_layer == 0); + ASSERT(copy.src_subresource.num_layers == 1); + ASSERT(copy.dst_subresource.base_layer == 0); + ASSERT(copy.dst_subresource.num_layers == 1); + + glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); + glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); + glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, + GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); + glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), + copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); + glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); + } + program_manager.RestoreGuestCompute(); +} + +GLenum StoreFormat(u32 bytes_per_block) { + switch (bytes_per_block) { + case 1: + return GL_R8UI; + case 2: + return GL_R16UI; + case 4: + return GL_R32UI; + case 8: + return GL_RG32UI; + case 16: + return GL_RGBA32UI; + } + UNREACHABLE(); + return GL_R8UI; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h new file mode 100644 index 000000000..359997255 --- /dev/null +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <span> + +#include <glad/glad.h> + +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +class Image; +class ImageBufferMap; +class ProgramManager; + +class UtilShaders { +public: + explicit UtilShaders(ProgramManager& program_manager); + ~UtilShaders(); + + void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::SwizzleParameters> swizzles); + + void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::SwizzleParameters> swizzles); + + void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::SwizzleParameters> swizzles); + + void CopyBC4(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies); + +private: + ProgramManager& program_manager; + + OGLBuffer swizzle_table_buffer; + + OGLProgram block_linear_unswizzle_2d_program; + OGLProgram block_linear_unswizzle_3d_program; + OGLProgram pitch_unswizzle_program; + OGLProgram copy_bc4_program; +}; + +GLenum StoreFormat(u32 bytes_per_block); + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp deleted file mode 100644 index 6d7bb16b2..000000000 --- a/src/video_core/renderer_opengl/utils.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <string> -#include <vector> - -#include <fmt/format.h> -#include <glad/glad.h> - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/utils.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { - if (!GLAD_GL_KHR_debug) { - // We don't need to throw an error as this is just for debugging - return; - } - - std::string object_label; - if (extra_info.empty()) { - switch (identifier) { - case GL_TEXTURE: - object_label = fmt::format("Texture@0x{:016X}", addr); - break; - case GL_PROGRAM: - object_label = fmt::format("Shader@0x{:016X}", addr); - break; - default: - object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr); - break; - } - } else { - object_label = fmt::format("{}@0x{:016X}", extra_info, addr); - } - glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h deleted file mode 100644 index 9c09ee12c..000000000 --- a/src/video_core/renderer_opengl/utils.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <string_view> -#include <vector> -#include <glad/glad.h> -#include "common/common_types.h" - -namespace OpenGL { - -void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp new file mode 100644 index 000000000..87c8e5693 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -0,0 +1,624 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> + +#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" +#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" +#include "video_core/host_shaders/full_screen_triangle_vert_spv.h" +#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" +#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/renderer_vulkan/blit_image.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/surface.h" + +namespace Vulkan { + +using VideoCommon::ImageViewType; + +namespace { +struct PushConstants { + std::array<float, 2> tex_scale; + std::array<float, 2> tex_offset; +}; + +template <u32 binding> +inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = nullptr, +}; +constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{ + TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, + TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>, +}; +constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = 1, + .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, +}; +constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), + .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), +}; +constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + .size = sizeof(PushConstants), +}; +constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = 0, + .pVertexBindingDescriptions = nullptr, + .vertexAttributeDescriptionCount = 0, + .pVertexAttributeDescriptions = nullptr, +}; +constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + .primitiveRestartEnable = VK_FALSE, +}; +constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, +}; +constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, +}; +constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, +}; +constexpr std::array DYNAMIC_STATES{ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, +}; +constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()), + .pDynamicStates = DYNAMIC_STATES.data(), +}; +constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 0, + .pAttachments = nullptr, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, +}; +constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = VK_TRUE, + .depthWriteEnable = VK_TRUE, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = VK_FALSE, + .stencilTestEnable = VK_FALSE, + .front = VkStencilOpState{}, + .back = VkStencilOpState{}, + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, +}; + +template <VkFilter filter> +inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = filter, + .minFilter = filter, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE, + .unnormalizedCoordinates = VK_TRUE, +}; + +constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( + const VkDescriptorSetLayout* set_layout) { + return VkPipelineLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &PUSH_CONSTANT_RANGE, + }; +} + +constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage, + VkShaderModule shader) { + return VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = stage, + .module = shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }; +} + +constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages( + VkShaderModule vertex_shader, VkShaderModule fragment_shader) { + return std::array{ + PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader), + PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader), + }; +} + +void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, + VkSampler sampler, VkImageView image_view) { + const VkDescriptorImageInfo image_info{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkWriteDescriptorSet write_descriptor_set{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr); +} + +void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set, + VkSampler sampler, VkImageView image_view_0, + VkImageView image_view_1) { + const VkDescriptorImageInfo image_info_0{ + .sampler = sampler, + .imageView = image_view_0, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkDescriptorImageInfo image_info_1{ + .sampler = sampler, + .imageView = image_view_1, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const std::array write_descriptor_sets{ + VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info_0, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }, + VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_set, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info_1, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }, + }; + device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, + const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region) { + const VkOffset2D offset{ + .x = std::min(dst_region[0].x, dst_region[1].x), + .y = std::min(dst_region[0].y, dst_region[1].y), + }; + const VkExtent2D extent{ + .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)), + .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)), + }; + const VkViewport viewport{ + .x = static_cast<float>(offset.x), + .y = static_cast<float>(offset.y), + .width = static_cast<float>(extent.width), + .height = static_cast<float>(extent.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + // TODO: Support scissored blits + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x); + const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y); + const PushConstants push_constants{ + .tex_scale = {scale_x, scale_y}, + .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)}, + }; + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); +} + +} // Anonymous namespace + +BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_, + StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) + : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, + one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( + ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), + two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( + TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), + one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), + two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), + one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( + PipelineLayoutCreateInfo(one_texture_set_layout.address()))), + two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( + PipelineLayoutCreateInfo(two_textures_set_layout.address()))), + full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), + blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), + convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), + convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), + linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), + nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) { + if (device.IsExtShaderStencilExportSupported()) { + blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV); + } +} + +BlitImageHelper::~BlitImageHelper() = default; + +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear; + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = operation, + }; + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; + const VkPipeline pipeline = FindOrEmplacePipeline(key); + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, + &device = device](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + +void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, + VkImageView src_depth_view, VkImageView src_stencil_view, + const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); + ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); + + const VkPipelineLayout layout = *two_textures_pipeline_layout; + const VkSampler sampler = *nearest_sampler; + const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, + src_stencil_view, descriptor_set, + &device = device](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, + src_stencil_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + +void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + + ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); + Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkSampler sampler = *nearest_sampler; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + const VkExtent2D extent{ + .width = src_image_view.size.width, + .height = src_image_view.size.height, + }; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, + &device = device](vk::CommandBuffer cmdbuf) { + const VkOffset2D offset{ + .x = 0, + .y = 0, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast<float>(extent.width), + .height = static_cast<float>(extent.height), + .minDepth = 0.0f, + .maxDepth = 0.0f, + }; + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const PushConstants push_constants{ + .tex_scale = {viewport.width, viewport.height}, + .tex_offset = {0.0f, 0.0f}, + }; + UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); + + // TODO: Barriers + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + +VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) { + const auto it = std::ranges::find(blit_color_keys, key); + if (it != blit_color_keys.end()) { + return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; + } + blit_color_keys.push_back(key); + + const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag); + const VkPipelineColorBlendAttachmentState blend_attachment{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + // TODO: programmable blending + const VkPipelineColorBlendStateCreateInfo color_blend_create_info{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &blend_attachment, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_create_info, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = key.renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + })); + return *blit_color_pipelines.back(); +} + +VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { + if (blit_depth_stencil_pipeline) { + return *blit_depth_stencil_pipeline; + } + const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); + blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *two_textures_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); + return *blit_depth_stencil_pipeline; +} + +void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = nullptr, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + +void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h new file mode 100644 index 000000000..2c2790bf9 --- /dev/null +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -0,0 +1,97 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <compare> + +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/texture_cache/types.h" + +namespace Vulkan { + +using VideoCommon::Offset2D; + +class VKDevice; +class VKScheduler; +class StateTracker; + +class Framebuffer; +class ImageView; + +struct BlitImagePipelineKey { + constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default; + + VkRenderPass renderpass; + Tegra::Engines::Fermi2D::Operation operation; +}; + +class BlitImageHelper { +public: + explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler, + StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); + ~BlitImageHelper(); + + void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, + VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + +private: + void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + const ImageView& src_image_view); + + [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); + + [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass); + + void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + + void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + + const VKDevice& device; + VKScheduler& scheduler; + StateTracker& state_tracker; + + vk::DescriptorSetLayout one_texture_set_layout; + vk::DescriptorSetLayout two_textures_set_layout; + DescriptorAllocator one_texture_descriptor_allocator; + DescriptorAllocator two_textures_descriptor_allocator; + vk::PipelineLayout one_texture_pipeline_layout; + vk::PipelineLayout two_textures_pipeline_layout; + vk::ShaderModule full_screen_vert; + vk::ShaderModule blit_color_to_color_frag; + vk::ShaderModule blit_depth_stencil_frag; + vk::ShaderModule convert_depth_to_float_frag; + vk::ShaderModule convert_float_to_depth_frag; + vk::Sampler linear_sampler; + vk::Sampler nearest_sampler; + + std::vector<BlitImagePipelineKey> blit_color_keys; + std::vector<vk::Pipeline> blit_color_pipelines; + vk::Pipeline blit_depth_stencil_pipeline; + vk::Pipeline convert_d32_to_r32_pipeline; + vk::Pipeline convert_r32_to_d32_pipeline; + vk::Pipeline convert_d16_to_r16_pipeline; + vk::Pipeline convert_r16_to_d16_pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5ec43db11..67dd10500 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta logic_op.Assign(PackLogicOp(regs.logic_op.operation)); rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); + msaa_mode.Assign(regs.multisample_mode); raw2 = 0; const auto test_func = diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index c26b77790..7e95e6fce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -186,6 +186,7 @@ struct FixedPipelineState { BitField<19, 4, u32> logic_op; BitField<23, 1, u32> rasterize_enable; BitField<24, 4, Maxwell::PrimitiveTopology> topology; + BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index d22de1d81..4c988429f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -26,7 +26,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter) { case Tegra::Texture::TextureFilter::Linear: return VK_FILTER_LINEAR; } - UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter)); + UNREACHABLE_MSG("Invalid sampler filter={}", filter); return {}; } @@ -43,7 +43,7 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter case Tegra::Texture::TextureMipmapFilter::Linear: return VK_SAMPLER_MIPMAP_MODE_LINEAR; } - UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); + UNREACHABLE_MSG("Invalid sampler mipmap mode={}", mipmap_filter); return {}; } @@ -79,7 +79,7 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w UNIMPLEMENTED(); return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; default: - UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); + UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", wrap_mode); return {}; } } @@ -103,8 +103,7 @@ VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_ case Tegra::Texture::DepthCompareFunc::Always: return VK_COMPARE_OP_ALWAYS; } - UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", - static_cast<u32>(depth_compare_func)); + UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", depth_compare_func); return {}; } @@ -123,7 +122,7 @@ struct FormatTuple { {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM - {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM + {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT @@ -164,7 +163,7 @@ struct FormatTuple { {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT {VK_FORMAT_UNDEFINED}, // R16G16_UINT - {VK_FORMAT_UNDEFINED}, // R16G16_SINT + {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB @@ -228,25 +227,26 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; if (tuple.format == VK_FORMAT_UNDEFINED) { - UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", - static_cast<u32>(pixel_format)); + UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", pixel_format); return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; } // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { - tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) - ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 - : VK_FORMAT_A8B8G8R8_UNORM_PACK32; + const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format); + tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32; } const bool attachable = tuple.usage & Attachable; const bool storage = tuple.usage & Storage; - VkFormatFeatureFlags usage; - if (format_type == FormatType::Buffer) { + VkFormatFeatureFlags usage{}; + switch (format_type) { + case FormatType::Buffer: usage = VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; - } else { + break; + case FormatType::Linear: + case FormatType::Optimal: usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; if (attachable) { @@ -256,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo if (storage) { usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; } + break; } return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } @@ -275,7 +276,7 @@ VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { case Tegra::Engines::ShaderType::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; } - UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); + UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); return {}; } @@ -300,7 +301,7 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, case Maxwell::PrimitiveTopology::Patches: return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; default: - UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); + UNIMPLEMENTED_MSG("Unimplemented topology={}", topology); return {}; } } @@ -490,8 +491,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib } break; } - UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type), - static_cast<u32>(size)); + UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", type, size); return {}; } @@ -522,7 +522,7 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { case Maxwell::ComparisonOp::AlwaysOld: return VK_COMPARE_OP_ALWAYS; } - UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); return {}; } @@ -539,7 +539,7 @@ VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_forma case Maxwell::IndexFormat::UnsignedInt: return VK_INDEX_TYPE_UINT32; } - UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); + UNIMPLEMENTED_MSG("Unimplemented index_format={}", index_format); return {}; } @@ -570,7 +570,7 @@ VkStencilOp StencilOp(Maxwell::StencilOp stencil_op) { case Maxwell::StencilOp::DecrWrapOGL: return VK_STENCIL_OP_DECREMENT_AND_WRAP; } - UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); + UNIMPLEMENTED_MSG("Unimplemented stencil op={}", stencil_op); return {}; } @@ -592,7 +592,7 @@ VkBlendOp BlendEquation(Maxwell::Blend::Equation equation) { case Maxwell::Blend::Equation::MaxGL: return VK_BLEND_OP_MAX; } - UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); + UNIMPLEMENTED_MSG("Unimplemented blend equation={}", equation); return {}; } @@ -656,7 +656,7 @@ VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor) { case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; } - UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); + UNIMPLEMENTED_MSG("Unimplemented blend factor={}", factor); return {}; } @@ -667,7 +667,7 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face) { case Maxwell::FrontFace::CounterClockWise: return VK_FRONT_FACE_COUNTER_CLOCKWISE; } - UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); + UNIMPLEMENTED_MSG("Unimplemented front face={}", front_face); return {}; } @@ -680,7 +680,7 @@ VkCullModeFlags CullFace(Maxwell::CullFace cull_face) { case Maxwell::CullFace::FrontAndBack: return VK_CULL_MODE_FRONT_AND_BACK; } - UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); + UNIMPLEMENTED_MSG("Unimplemented cull face={}", cull_face); return {}; } @@ -700,7 +700,7 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { case Tegra::Texture::SwizzleSource::OneFloat: return VK_COMPONENT_SWIZZLE_ONE; } - UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); + UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", swizzle); return {}; } @@ -723,8 +723,21 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) case Maxwell::ViewportSwizzle::NegativeW: return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV; } - UNREACHABLE_MSG("Invalid swizzle={}", static_cast<int>(swizzle)); + UNREACHABLE_MSG("Invalid swizzle={}", swizzle); return {}; } +VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) { + switch (reduction) { + case Tegra::Texture::SamplerReduction::WeightedAverage: + return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; + case Tegra::Texture::SamplerReduction::Min: + return VK_SAMPLER_REDUCTION_MODE_MIN_EXT; + case Tegra::Texture::SamplerReduction::Max: + return VK_SAMPLER_REDUCTION_MODE_MAX_EXT; + } + UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction)); + return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7e213452f..1a90f192e 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); +VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index a2173edd2..7f521cb9b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() { return library; } -std::pair<vk::Instance, u32> CreateInstance( - Common::DynamicLibrary& library, vk::InstanceDispatch& dld, - WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) { +std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library, + vk::InstanceDispatch& dld, WindowSystemType window_type, + bool enable_debug_utils, bool enable_layers) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); return {}; @@ -133,7 +133,7 @@ std::pair<vk::Instance, u32> CreateInstance( if (window_type != Core::Frontend::WindowSystemType::Headless) { extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } - if (enable_layers) { + if (enable_debug_utils) { extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); @@ -243,8 +243,8 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, - std::unique_ptr<Core::Frontend::GraphicsContext> context) - : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_}, + std::unique_ptr<Core::Frontend::GraphicsContext> context_) + : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_}, cpu_memory{cpu_memory_}, gpu{gpu_} {} RendererVulkan::~RendererVulkan() { @@ -287,7 +287,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { bool RendererVulkan::Init() { library = OpenVulkanLibrary(); std::tie(instance, instance_version) = CreateInstance( - library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); + library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug); if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { return false; } @@ -447,7 +447,8 @@ void RendererVulkan::Report() const { std::vector<std::string> RendererVulkan::EnumerateDevices() { vk::InstanceDispatch dld; Common::DynamicLibrary library = OpenVulkanLibrary(); - vk::Instance instance = CreateInstance(library, dld).first; + vk::Instance instance = + CreateInstance(library, dld, WindowSystemType::Headless, false, false).first; if (!instance) { return {}; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 1044ca124..74642fba4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -33,10 +33,9 @@ class VKDevice; class VKMemoryManager; class VKSwapchain; class VKScheduler; -class VKImage; struct VKScreenInfo { - VKImage* image{}; + VkImageView image_view{}; u32 width{}; u32 height{}; bool is_srgb{}; @@ -45,9 +44,9 @@ struct VKScreenInfo { class RendererVulkan final : public VideoCore::RendererBase { public: explicit RendererVulkan(Core::TelemetrySession& telemtry_session, - Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, - Tegra::GPU& gpu, - std::unique_ptr<Core::Frontend::GraphicsContext> context); + Core::Frontend::EmuWindow& emu_window, + Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, + std::unique_ptr<Core::Frontend::GraphicsContext> context_); ~RendererVulkan() override; bool Init() override; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index b5b60309e..d3a83f22f 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -16,12 +16,12 @@ #include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/gpu.h" -#include "video_core/morton.h" +#include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -29,108 +29,12 @@ #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" +#include "video_core/textures/decoders.h" namespace Vulkan { namespace { -// Generated from the "shaders/" directory, read the instructions there. -constexpr u8 blit_vertex_code[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, - 0x38, 0x00, 0x01, 0x00}; - -constexpr u8 blit_fragment_code[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; - struct ScreenRectVertex { ScreenRectVertex() = default; explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} @@ -173,9 +77,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { // clang-format on } -std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { +u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { using namespace VideoCore::Surface; - return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); + return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); } std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { @@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); - VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); - - UpdateDescriptorSet(image_index, blit_image->GetPresentView()); + UpdateDescriptorSet(image_index, + use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); BufferData data; SetUniformData(data, framebuffer); SetVertexData(data, framebuffer); auto map = buffer_commit->Map(); - std::memcpy(map.GetAddress(), &data, sizeof(data)); + std::memcpy(map.Address(), &data, sizeof(data)); if (!use_accelerated) { const u64 image_offset = GetRawImageOffset(framebuffer, image_index); - const auto pixel_format = - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; - const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); - rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); + const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); + const size_t size_bytes = GetSizeInBytes(framebuffer); + rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes); // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; - VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, - framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, - map.GetAddress() + image_offset, host_ptr); - - blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); + Tegra::Texture::UnswizzleTexture( + std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes), + bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); const VkBufferImageCopy copy{ .bufferOffset = image_offset, @@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool }, }; scheduler.Record( - [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); + [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); }); } map.Release(); - blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], descriptor_set = descriptor_sets[image_index], buffer = *buffer, size = swapchain.GetSize(), pipeline = *pipeline, @@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool const VkClearValue clear_color{ .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, }; - - VkRenderPassBeginInfo renderpass_bi; - renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderpass_bi.pNext = nullptr; - renderpass_bi.renderPass = renderpass; - renderpass_bi.framebuffer = framebuffer; - renderpass_bi.renderArea.offset.x = 0; - renderpass_bi.renderArea.offset.y = 0; - renderpass_bi.renderArea.extent = size; - renderpass_bi.clearValueCount = 1; - renderpass_bi.pClearValues = &clear_color; - - VkViewport viewport; - viewport.x = 0.0f; - viewport.y = 0.0f; - viewport.width = static_cast<float>(size.width); - viewport.height = static_cast<float>(size.height); - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - - VkRect2D scissor; - scissor.offset.x = 0; - scissor.offset.y = 0; - scissor.extent = size; - + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer, + .renderArea = + { + .offset = {0, 0}, + .extent = size, + }, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast<float>(size.width), + .height = static_cast<float>(size.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const VkRect2D scissor{ + .offset = {0, 0}, + .extent = size, + }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.SetViewport(0, viewport); @@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) } void VKBlitScreen::CreateShaders() { - vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code); - fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code); + vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); + fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); } void VKBlitScreen::CreateSemaphores() { @@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() { const VkAttachmentReference color_attachment_ref{ .attachment = 0, - .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .layout = VK_IMAGE_LAYOUT_GENERAL, }; const VkSubpassDescription subpass_description{ @@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { raw_images.resize(image_count); + raw_image_views.resize(image_count); raw_buffer_commits.resize(image_count); - const VkImageCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .imageType = VK_IMAGE_TYPE_2D, - .format = GetFormat(framebuffer), - .extent = - { - .width = framebuffer.width, - .height = framebuffer.height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }; - - for (std::size_t i = 0; i < image_count; ++i) { - raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); - raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); + for (size_t i = 0; i < image_count; ++i) { + raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = VK_IMAGE_TYPE_2D, + .format = GetFormat(framebuffer), + .extent = + { + .width = framebuffer.width, + .height = framebuffer.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }); + raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false); + raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *raw_images[i], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = GetFormat(framebuffer), + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); } } @@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag const VkDescriptorImageInfo image_info{ .sampler = *sampler, .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; const VkWriteDescriptorSet sampler_write{ diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 8f2839214..2ee374247 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -35,7 +35,6 @@ struct ScreenInfo; class RasterizerVulkan; class VKDevice; -class VKImage; class VKScheduler; class VKSwapchain; @@ -110,7 +109,8 @@ private: std::vector<u64> resource_ticks; std::vector<vk::Semaphore> semaphores; - std::vector<std::unique_ptr<VKImage>> raw_images; + std::vector<vk::Image> raw_images; + std::vector<vk::ImageView> raw_image_views; std::vector<VKMemoryCommit> raw_buffer_commits; u32 raw_width = 0; u32 raw_height = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d9d3da9ea..10d296c2f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -31,20 +31,24 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; +constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = + VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { - return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); + return std::make_unique<VKStreamBuffer>(device, scheduler); } } // Anonymous namespace -Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, - VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) - : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { +Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_, + VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) + : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ + staging_pool_} { const VkBufferCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = static_cast<VkDeviceSize>(size), + .size = static_cast<VkDeviceSize>(size_), .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, @@ -57,69 +61,86 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKSchedu Buffer::~Buffer() = default; -void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { - const auto& staging = staging_pool.GetUnusedBuffer(size, true); - std::memcpy(staging.commit->Map(size), data, size); +void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(data_size, true); + std::memcpy(staging.commit->Map(data_size), data, data_size); scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer handle = Handle(); - scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); - - const VkBufferMemoryBarrier barrier{ + scheduler.Record([staging = *staging.handle, handle, offset, data_size, + &device = device](vk::CommandBuffer cmdbuf) { + const VkBufferMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = UPLOAD_ACCESS_BARRIERS, + .srcAccessMask = + VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | + VK_ACCESS_HOST_WRITE_BIT | + (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = handle, .offset = offset, - .size = size, + .size = data_size, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, - barrier, {}); - }); -} - -void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { - const auto& staging = staging_pool.GetUnusedBuffer(size, true); - scheduler.RequestOutsideRenderPassOperationContext(); - - const VkBuffer handle = Handle(); - scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { - const VkBufferMemoryBarrier barrier{ + const VkBufferMemoryBarrier write_barrier{ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = UPLOAD_ACCESS_BARRIERS, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = handle, .offset = offset, - .size = size, + .size = data_size, }; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); - cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size}); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, + write_barrier); }); +} + +void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(data_size, true); + scheduler.RequestOutsideRenderPassOperationContext(); + + const VkBuffer handle = Handle(); + scheduler.Record( + [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = data_size, + }; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); + cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size}); + }); scheduler.Finish(); - std::memcpy(data, staging.commit->Map(size), size); + std::memcpy(data, staging.commit->Map(data_size), data_size); } void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size) { + std::size_t copy_size) { scheduler.RequestOutsideRenderPassOperationContext(); const VkBuffer dst_buffer = Handle(); scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, - size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); + copy_size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); std::array<VkBufferMemoryBarrier, 2> barriers; barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -130,7 +151,7 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barriers[0].buffer = src_buffer; barriers[0].offset = src_offset; - barriers[0].size = size; + barriers[0].size = copy_size; barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barriers[1].pNext = nullptr; barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -139,19 +160,19 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barriers[1].buffer = dst_buffer; barriers[1].offset = dst_offset; - barriers[1].size = size; + barriers[1].size = copy_size; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, barriers, {}); }); } -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, const VKDevice& device_, VKMemoryManager& memory_manager_, - VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) - : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory, - CreateStreamBuffer(device_, - scheduler_)}, + VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, + VKStagingBufferPool& staging_pool_) + : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, + cpu_memory_, stream_buffer_}, device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ staging_pool_} {} diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 7fb5ceedf..daf498222 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -22,15 +22,15 @@ class VKScheduler; class Buffer final : public VideoCommon::BufferBlock { public: explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, - VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); + VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); ~Buffer(); - void Upload(std::size_t offset, std::size_t size, const u8* data); + void Upload(std::size_t offset, std::size_t data_size, const u8* data); - void Download(std::size_t offset, std::size_t size, u8* data); + void Download(std::size_t offset, std::size_t data_size, u8* data); void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t size); + std::size_t copy_size); VkBuffer Handle() const { return *buffer.handle; @@ -41,6 +41,7 @@ public: } private: + const VKDevice& device; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; @@ -52,7 +53,8 @@ public: explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool); + VKScheduler& scheduler, VKStreamBuffer& stream_buffer, + VKStagingBufferPool& staging_pool); ~VKBufferCache(); BufferInfo GetEmptyBuffer(std::size_t size) override; diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp index 6339f4fe0..8f7d6410e 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -17,8 +17,8 @@ struct CommandPool::Pool { vk::CommandBuffers cmdbufs; }; -CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device) - : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device} {} +CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_) + : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {} CommandPool::~CommandPool() = default; diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h index b9cb3fb5d..62a7ce3f1 100644 --- a/src/video_core/renderer_vulkan/vk_command_pool.h +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -17,7 +17,7 @@ class VKDevice; class CommandPool final : public ResourcePool { public: - explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device); + explicit CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_); ~CommandPool() override; void Allocate(size_t begin, size_t end) override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 9637c6059..2c030e910 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -10,6 +10,9 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" +#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" +#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" +#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -22,99 +25,6 @@ namespace Vulkan { namespace { -// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there. -constexpr u8 quad_array[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, - 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00, - 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, - 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, - 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, - 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { return { .binding = 0, @@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { }; } -// Uint8 SPIR-V module. Generated from the "shaders/" directory. -constexpr u8 uint8_pass[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, - 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, - 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f, - 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, - 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, - 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, - 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - -// Quad indexed SPIR-V module. Generated from the "shaders/" directory. -constexpr u8 QUAD_INDEXED_SPV[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, - 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, - 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, - 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, - 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, - 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, - 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, - 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, - 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, - 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, - 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, - 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, - 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, - 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, - 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, - 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, - 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, - 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, - 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, - 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, - 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, - 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, - 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, - 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, - 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; - std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { return {{ { @@ -381,8 +89,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, vk::Span<VkDescriptorSetLayoutBinding> bindings, vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, - vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, - const u8* code) { + vk::Span<VkPushConstantRange> push_constants, + std::span<const u32> code) { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto .bindingCount = bindings.size(), .pBindings = bindings.data(), }); - layout = device.GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto .pushConstantRangeCount = push_constants.size(), .pPushConstantRanges = push_constants.data(), }); - if (!templates.empty()) { descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, @@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } - - auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); - std::memcpy(code_copy.get(), code, code_size); - module = device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .codeSize = code_size, - .pCode = code_copy.get(), + .codeSize = static_cast<u32>(code.size_bytes()), + .pCode = code.data(), }); - pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -461,15 +162,15 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( return set; } -QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), +QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) + : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), BuildQuadArrayPassDescriptorUpdateTemplateEntry(), - BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), - scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, - update_descriptor_queue{update_descriptor_queue} {} + BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + update_descriptor_queue{update_descriptor_queue_} {} QuadArrayPass::~QuadArrayPass() = default; @@ -510,14 +211,13 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 return {*buffer.handle, 0}; } -Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue) +Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), - uint8_pass), - scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, - update_descriptor_queue{update_descriptor_queue} {} + BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + update_descriptor_queue{update_descriptor_queue_} {} Uint8Pass::~Uint8Pass() = default; @@ -555,16 +255,15 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff return {*buffer.handle, 0}; } -QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), +QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) + : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), BuildInputOutputDescriptorUpdateTemplate(), - BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), - QUAD_INDEXED_SPV), - scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, - update_descriptor_queue{update_descriptor_queue} {} + BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + update_descriptor_queue{update_descriptor_queue_} {} QuadIndexedPass::~QuadIndexedPass() = default; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index acc94f27e..abdf61e2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -5,6 +5,7 @@ #pragma once #include <optional> +#include <span> #include <utility> #include "common/common_types.h" @@ -24,8 +25,7 @@ public: explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, vk::Span<VkDescriptorSetLayoutBinding> bindings, vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, - vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, - const u8* code); + vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); ~VKComputePass(); protected: @@ -43,10 +43,10 @@ private: class QuadArrayPass final : public VKComputePass { public: - explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + explicit QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadArrayPass(); std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); @@ -59,9 +59,10 @@ private: class Uint8Pass final : public VKComputePass { public: - explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + explicit Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_); ~Uint8Pass(); std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); @@ -74,10 +75,10 @@ private: class QuadIndexedPass final : public VKComputePass { public: - explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKStagingBufferPool& staging_buffer_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + explicit QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKStagingBufferPool& staging_buffer_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9be72dc9b..62f44d6da 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -15,16 +15,16 @@ namespace Vulkan { -VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - const SPIRVShader& shader) - : device{device}, scheduler{scheduler}, entries{shader.entries}, +VKComputePipeline::VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const SPIRVShader& shader_) + : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, descriptor_set_layout{CreateDescriptorSetLayout()}, - descriptor_allocator{descriptor_pool, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, + descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, + update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, descriptor_template{CreateDescriptorUpdateTemplate()}, - shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {} + shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} VKComputePipeline::~VKComputePipeline() = default; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 6e2f22a4a..49e2113a2 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -17,10 +17,10 @@ class VKUpdateDescriptorQueue; class VKComputePipeline final { public: - explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - const SPIRVShader& shader); + explicit VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const SPIRVShader& shader_); ~VKComputePipeline(); VkDescriptorSet CommitDescriptorSet(); diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index f34ed6735..370a63f74 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -46,6 +46,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, + VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -122,6 +123,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UINT, VK_FORMAT_R8G8B8A8_SRGB, @@ -161,18 +163,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_BC2_SRGB_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, - VK_FORMAT_ASTC_8x8_SRGB_BLOCK, - VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK, VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, - VK_FORMAT_ASTC_10x8_UNORM_BLOCK, - VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, + VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, + VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, + VK_FORMAT_ASTC_12x10_SRGB_BLOCK, VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, VK_FORMAT_ASTC_8x6_UNORM_BLOCK, @@ -192,7 +208,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) - : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, + : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { SetupFamilies(surface); SetupFeatures(); @@ -214,7 +230,7 @@ bool VKDevice::Create() { features2.features = { .robustBufferAccess = false, .fullDrawIndexUint32 = false, - .imageCubeArray = false, + .imageCubeArray = true, .independentBlend = true, .geometryShader = true, .tessellationShader = true, @@ -242,7 +258,7 @@ bool VKDevice::Create() { .shaderTessellationAndGeometryPointSize = false, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = false, + .shaderStorageImageMultisample = true, .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, .shaderStorageImageWriteWithoutFormat = true, .shaderUniformBufferArrayDynamicIndexing = false, @@ -268,7 +284,6 @@ bool VKDevice::Create() { .variableMultisampleRate = false, .inheritedQueries = false, }; - VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, .pNext = nullptr, @@ -380,6 +395,20 @@ bool VKDevice::Create() { LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceRobustness2FeaturesEXT robustness2; + if (ext_robustness2) { + robustness2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, + .pNext = nullptr, + .robustBufferAccess2 = false, + .robustImageAccess2 = true, + .nullDescriptor = true, + }; + SetNext(next, robustness2); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -405,7 +434,14 @@ bool VKDevice::Create() { } CollectTelemetryParameters(); + CollectToolingInfo(); + if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { + LOG_WARNING( + Render_Vulkan, + "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); + ext_extended_dynamic_state = false; + } if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it // seems to cause stability issues @@ -458,7 +494,7 @@ void VKDevice::ReportLoss() const { LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); // Wait for the log to flush and for Nsight Aftermath to dump the results - std::this_thread::sleep_for(std::chrono::seconds{3}); + std::this_thread::sleep_for(std::chrono::seconds{15}); } void VKDevice::SaveShader(const std::vector<u32>& spirv) const { @@ -491,14 +527,24 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; for (const auto format : astc_formats) { - const auto format_properties{physical.GetFormatProperties(format)}; - if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { + const auto physical_format_properties{physical.GetFormatProperties(format)}; + if ((physical_format_properties.optimalTilingFeatures & format_feature_usage) == 0) { return false; } } return true; } +bool VKDevice::TestDepthStencilBlits() const { + static constexpr VkFormatFeatureFlags required_features = + VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + const auto test_features = [](VkFormatProperties props) { + return (props.optimalTilingFeatures & required_features) == required_features; + }; + return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) && + test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT)); +} + bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const { const auto it = format_properties.find(wanted_format); @@ -569,6 +615,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { const auto features{physical.GetFeatures()}; const std::array feature_report = { std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), + std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), std::make_pair(features.depthClamp, "depthClamp"), std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), @@ -580,6 +627,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), + std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), }; @@ -608,6 +656,7 @@ std::vector<const char*> VKDevice::LoadExtensions() { bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; + bool has_ext_robustness2{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, bool push) { @@ -627,11 +676,15 @@ std::vector<const char*> VKDevice::LoadExtensions() { test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); + test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); + test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); + test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); + test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); if (instance_version >= VK_API_VERSION_1_1) { test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); } @@ -644,8 +697,8 @@ std::vector<const char*> VKDevice::LoadExtensions() { VkPhysicalDeviceFeatures2KHR features; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; - VkPhysicalDeviceProperties2KHR properties; - properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + VkPhysicalDeviceProperties2KHR physical_properties; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; if (has_khr_shader_float16_int8) { VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features; @@ -670,8 +723,8 @@ std::vector<const char*> VKDevice::LoadExtensions() { subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT; subgroup_properties.pNext = nullptr; - properties.pNext = &subgroup_properties; - physical.GetProperties2KHR(properties); + physical_properties.pNext = &subgroup_properties; + physical.GetProperties2KHR(physical_properties); is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; @@ -695,8 +748,8 @@ std::vector<const char*> VKDevice::LoadExtensions() { VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; tfb_properties.pNext = nullptr; - properties.pNext = &tfb_properties; - physical.GetProperties2KHR(properties); + physical_properties.pNext = &tfb_properties; + physical.GetProperties2KHR(physical_properties); if (tfb_features.transformFeedback && tfb_features.geometryStreams && tfb_properties.maxTransformFeedbackStreams >= 4 && @@ -733,6 +786,18 @@ std::vector<const char*> VKDevice::LoadExtensions() { } } + if (has_ext_robustness2) { + VkPhysicalDeviceRobustness2FeaturesEXT robustness2; + robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = nullptr; + features.pNext = &robustness2; + physical.GetFeatures2KHR(features); + if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { + extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); + ext_robustness2 = true; + } + } + return extensions; } @@ -764,6 +829,7 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) { void VKDevice::SetupFeatures() { const auto supported_features{physical.GetFeatures()}; is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; + is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); } @@ -794,6 +860,32 @@ void VKDevice::CollectTelemetryParameters() { } } +void VKDevice::CollectToolingInfo() { + if (!ext_tooling_info) { + return; + } + const auto vkGetPhysicalDeviceToolPropertiesEXT = + reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>( + dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT")); + if (!vkGetPhysicalDeviceToolPropertiesEXT) { + return; + } + u32 tool_count = 0; + if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) { + return; + } + std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count); + if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) { + return; + } + for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) { + const std::string_view name = tool.name; + LOG_INFO(Render_Vulkan, "{}", name); + has_renderdoc = has_renderdoc || name == "RenderDoc"; + has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics"; + } +} + std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { static constexpr float QUEUE_PRIORITY = 1.0f; diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 4286673d9..995dcfc0f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -157,6 +157,11 @@ public: return is_formatless_image_load_supported; } + /// Returns true when blitting from and to depth stencil images is supported. + bool IsBlitDepthStencilSupported() const { + return is_blit_depth_stencil_supported; + } + /// Returns true if the device supports VK_NV_viewport_swizzle. bool IsNvViewportSwizzleSupported() const { return nv_viewport_swizzle; @@ -172,6 +177,11 @@ public: return ext_index_type_uint8; } + /// Returns true if the device supports VK_EXT_sampler_filter_minmax. + bool IsExtSamplerFilterMinmaxSupported() const { + return ext_sampler_filter_minmax; + } + /// Returns true if the device supports VK_EXT_depth_range_unrestricted. bool IsExtDepthRangeUnrestrictedSupported() const { return ext_depth_range_unrestricted; @@ -197,6 +207,16 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_shader_stencil_export. + bool IsExtShaderStencilExportSupported() const { + return ext_shader_stencil_export; + } + + /// Returns true when a known debugging tool is attached. + bool HasDebuggingToolAttached() const { + return has_renderdoc || has_nsight_graphics; + } + /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { return vendor_name; @@ -228,16 +248,23 @@ private: /// Collects telemetry information from the device. void CollectTelemetryParameters(); + /// Collects information about attached tools. + void CollectToolingInfo(); + /// Returns a list of queue initialization descriptors. std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; + /// Returns true if the device natively supports blitting depth stencil images. + bool TestDepthStencilBlits() const; + /// Returns true if a format is supported. bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; + VkInstance instance; ///< Vulkan instance. vk::DeviceDispatch dld; ///< Device function pointers. vk::PhysicalDevice physical; ///< Physical device. VkPhysicalDeviceProperties properties; ///< Device properties. @@ -253,15 +280,22 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_robustness2{}; ///< Support for VK_EXT_robustness2. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Asynchronous Graphics Pipeline setting bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 5babbdd0b..774a12a53 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -14,12 +14,13 @@ namespace Vulkan { -InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {} +InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, + bool is_stubbed_) + : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} -InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, - u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {} +InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, + u32 payload_, bool is_stubbed_) + : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} InnerFence::~InnerFence() = default; @@ -71,11 +72,11 @@ bool InnerFence::IsEventSignalled() const { } } -VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, - Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, - VKBufferCache& buffer_cache, VKQueryCache& query_cache, +VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, + Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, + VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, const VKDevice& device_, VKScheduler& scheduler_) - : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache), + : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, device{device_}, scheduler{scheduler_} {} Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 1547d6d30..c2869e8e3 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -8,6 +8,7 @@ #include "video_core/fence_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Core { @@ -24,14 +25,13 @@ class VKBufferCache; class VKDevice; class VKQueryCache; class VKScheduler; -class VKTextureCache; class InnerFence : public VideoCommon::FenceBase { public: - explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, - bool is_stubbed); - explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, - u32 payload, bool is_stubbed); + explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, + bool is_stubbed_); + explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, + u32 payload_, bool is_stubbed_); ~InnerFence(); void Queue(); @@ -51,14 +51,14 @@ private: using Fence = std::shared_ptr<InnerFence>; using GenericFenceManager = - VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; + VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; class VKFenceManager final : public GenericFenceManager { public: - explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, - Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, - VKBufferCache& buffer_cache, VKQueryCache& query_cache, - const VKDevice& device, VKScheduler& scheduler); + explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, + Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, + VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, + const VKDevice& device_, VKScheduler& scheduler_); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0e8f9c352..7979df3a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -15,7 +15,6 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { }; } +VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // Anonymous namespace -VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - VKRenderPassCache& renderpass_cache, +VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, + VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, const GraphicsPipelineCacheKey& key, vk::Span<VkDescriptorSetLayoutBinding> bindings, - const SPIRVProgram& program) - : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()}, + const SPIRVProgram& program, u32 num_color_buffers) + : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, - descriptor_allocator{descriptor_pool, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( - program)}, - renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)}, - pipeline{CreatePipeline(cache_key.renderpass_params, program)} {} + descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, + update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, + descriptor_template{CreateDescriptorUpdateTemplate(program)}, + modules(CreateShaderModules(program)), + pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -162,8 +183,8 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( .codeSize = 0, }; - std::vector<vk::ShaderModule> modules; - modules.reserve(Maxwell::MaxShaderStage); + std::vector<vk::ShaderModule> shader_modules; + shader_modules.reserve(Maxwell::MaxShaderStage); for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { const auto& stage = program[i]; if (!stage) { @@ -174,13 +195,14 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( ci.codeSize = stage->code.size() * sizeof(u32); ci.pCode = stage->code.data(); - modules.push_back(device.GetLogical().CreateShaderModule(ci)); + shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); } - return modules; + return shader_modules; } -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const { +vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, + VkRenderPass renderpass, + u32 num_color_buffers) const { const auto& state = cache_key.fixed_state; const auto& viewport_swizzles = state.viewport_swizzles; @@ -230,7 +252,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa if (!attribute.enabled) { continue; } - if (input_attributes.find(static_cast<u32>(index)) == input_attributes.end()) { + if (!input_attributes.contains(static_cast<u32>(index))) { // Skip attributes not used by the vertex shaders. continue; } @@ -290,8 +312,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa }; std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; - std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), - UnpackViewportSwizzle); + std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, @@ -326,7 +347,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, @@ -352,8 +373,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa }; std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; - const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); - for (std::size_t index = 0; index < num_attachments; ++index) { + for (std::size_t index = 0; index < num_color_buffers; ++index) { static constexpr std::array COMPONENT_TABLE{ VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, @@ -387,7 +407,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .flags = 0, .logicOpEnable = VK_FALSE, .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = static_cast<u32>(num_attachments), + .attachmentCount = num_color_buffers, .pAttachments = cb_attachments.data(), .blendConstants = {}, }; @@ -447,8 +467,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa stage_ci.pNext = &subgroup_size_ci; } } - - const VkGraphicsPipelineCreateInfo ci{ + return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -468,8 +487,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa .subpass = 0, .basePipelineHandle = nullptr, .basePipelineIndex = 0, - }; - return device.GetLogical().CreateGraphicsPipeline(ci); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 58aa35efd..214d06b4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -8,10 +8,10 @@ #include <optional> #include <vector> +#include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -20,8 +20,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct GraphicsPipelineCacheKey { - RenderPassParams renderpass_params; - u32 padding; + VkRenderPass renderpass; std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; FixedPipelineState fixed_state; @@ -34,7 +33,7 @@ struct GraphicsPipelineCacheKey { } std::size_t Size() const noexcept { - return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); } }; static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); @@ -43,7 +42,6 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); class VKDescriptorPool; class VKDevice; -class VKRenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; @@ -51,13 +49,12 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt class VKGraphicsPipeline final { public: - explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, + explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - VKRenderPassCache& renderpass_cache, + VKUpdateDescriptorQueue& update_descriptor_queue_, const GraphicsPipelineCacheKey& key, vk::Span<VkDescriptorSetLayoutBinding> bindings, - const SPIRVProgram& program); + const SPIRVProgram& program, u32 num_color_buffers); ~VKGraphicsPipeline(); VkDescriptorSet CommitDescriptorSet(); @@ -70,10 +67,6 @@ public: return *layout; } - VkRenderPass GetRenderPass() const { - return renderpass; - } - GraphicsPipelineCacheKey GetCacheKey() const { return cache_key; } @@ -89,8 +82,8 @@ private: std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; - vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const; + vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, + u32 num_color_buffers) const; const VKDevice& device; VKScheduler& scheduler; @@ -104,7 +97,6 @@ private: vk::DescriptorUpdateTemplateKHR descriptor_template; std::vector<vk::ShaderModule> modules; - VkRenderPass renderpass; vk::Pipeline pipeline; }; diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp deleted file mode 100644 index 1c418ea17..000000000 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <memory> -#include <vector> - -#include "common/assert.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_image.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, const VkImageCreateInfo& image_ci, - VkImageAspectFlags aspect_mask) - : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask}, - image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} { - UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0, - "Queue family tracking is not implemented"); - - image = device.GetLogical().CreateImage(image_ci); - - const u32 num_ranges = image_num_layers * image_num_levels; - barriers.resize(num_ranges); - subrange_states.resize(num_ranges, {{}, image_ci.initialLayout}); -} - -VKImage::~VKImage() = default; - -void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout) { - if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { - return; - } - - std::size_t cursor = 0; - for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { - for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) { - const u32 layer = base_layer + layer_it; - const u32 level = base_level + level_it; - auto& state = GetSubrangeState(layer, level); - auto& barrier = barriers[cursor]; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = state.access; - barrier.dstAccessMask = new_access; - barrier.oldLayout = state.layout; - barrier.newLayout = new_layout; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = *image; - barrier.subresourceRange.aspectMask = aspect_mask; - barrier.subresourceRange.baseMipLevel = level; - barrier.subresourceRange.levelCount = 1; - barrier.subresourceRange.baseArrayLayer = layer; - barrier.subresourceRange.layerCount = 1; - state.access = new_access; - state.layout = new_layout; - } - } - - scheduler.RequestOutsideRenderPassOperationContext(); - - scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) { - // TODO(Rodrigo): Implement a way to use the latest stage across subresources. - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {}, - vk::Span(barriers.data(), cursor)); - }); -} - -bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkAccessFlags new_access, VkImageLayout new_layout) noexcept { - const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && - base_level == 0 && num_levels == image_num_levels; - if (!is_full_range) { - state_diverged = true; - } - - if (!state_diverged) { - auto& state = GetSubrangeState(0, 0); - if (state.access != new_access || state.layout != new_layout) { - return true; - } - } - - for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { - for (u32 level_it = 0; level_it < num_levels; ++level_it) { - const u32 layer = base_layer + layer_it; - const u32 level = base_level + level_it; - auto& state = GetSubrangeState(layer, level); - if (state.access != new_access || state.layout != new_layout) { - return true; - } - } - } - return false; -} - -void VKImage::CreatePresentView() { - // Image type has to be 2D to be presented. - present_view = device.GetLogical().CreateImageView({ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = format, - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = - { - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }); -} - -VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { - return subrange_states[static_cast<std::size_t>(layer * image_num_levels) + - static_cast<std::size_t>(level)]; -} - -} // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h deleted file mode 100644 index b4d7229e5..000000000 --- a/src/video_core/renderer_vulkan/vk_image.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <memory> -#include <vector> - -#include "common/common_types.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -class VKDevice; -class VKScheduler; - -class VKImage { -public: - explicit VKImage(const VKDevice& device, VKScheduler& scheduler, - const VkImageCreateInfo& image_ci, VkImageAspectFlags aspect_mask); - ~VKImage(); - - /// Records in the passed command buffer an image transition and updates the state of the image. - void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout); - - /// Returns a view compatible with presentation, the image has to be 2D. - VkImageView GetPresentView() { - if (!present_view) { - CreatePresentView(); - } - return *present_view; - } - - /// Returns the Vulkan image handler. - const vk::Image& GetHandle() const { - return image; - } - - /// Returns the Vulkan format for this image. - VkFormat GetFormat() const { - return format; - } - - /// Returns the Vulkan aspect mask. - VkImageAspectFlags GetAspectMask() const { - return aspect_mask; - } - -private: - struct SubrangeState final { - VkAccessFlags access = 0; ///< Current access bits. - VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout. - }; - - bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkAccessFlags new_access, VkImageLayout new_layout) noexcept; - - /// Creates a presentation view. - void CreatePresentView(); - - /// Returns the subrange state for a layer and layer. - SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept; - - const VKDevice& device; ///< Device handler. - VKScheduler& scheduler; ///< Device scheduler. - - const VkFormat format; ///< Vulkan format. - const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask. - const u32 image_num_layers; ///< Number of layers. - const u32 image_num_levels; ///< Number of mipmap levels. - - vk::Image image; ///< Image handle. - vk::ImageView present_view; ///< Image view compatible with presentation. - - std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers. - std::vector<SubrangeState> subrange_states; ///< Current subrange state. - - bool state_diverged = false; ///< True when subresources mismatch in layout. -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 24c8960ac..56b24b70f 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -29,10 +29,10 @@ u64 GetAllocationChunkSize(u64 required_size) { class VKMemoryAllocation final { public: - explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, - VkMemoryPropertyFlags properties, u64 allocation_size, u32 type) - : device{device}, memory{std::move(memory)}, properties{properties}, - allocation_size{allocation_size}, shifted_type{ShiftType(type)} {} + explicit VKMemoryAllocation(const VKDevice& device_, vk::DeviceMemory memory_, + VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_) + : device{device_}, memory{std::move(memory_)}, properties{properties_}, + allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {} VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) { auto found = TryFindFreeSection(free_iterator, allocation_size, @@ -117,8 +117,8 @@ private: std::vector<const VKMemoryCommitImpl*> commits; }; -VKMemoryManager::VKMemoryManager(const VKDevice& device) - : device{device}, properties{device.GetPhysical().GetMemoryProperties()} {} +VKMemoryManager::VKMemoryManager(const VKDevice& device_) + : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} VKMemoryManager::~VKMemoryManager() = default; @@ -207,16 +207,16 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi return {}; } -VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, - const vk::DeviceMemory& memory, u64 begin, u64 end) - : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {} +VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, + const vk::DeviceMemory& memory_, u64 begin_, u64 end_) + : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {} VKMemoryCommitImpl::~VKMemoryCommitImpl() { allocation->Free(this); } MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { - return MemoryMap{this, memory.Map(interval.first + offset_, size)}; + return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size)); } void VKMemoryCommitImpl::Unmap() const { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 1af88e3d4..318f8b43e 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -5,6 +5,7 @@ #pragma once #include <memory> +#include <span> #include <utility> #include <vector> #include "common/common_types.h" @@ -21,7 +22,7 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>; class VKMemoryManager final { public: - explicit VKMemoryManager(const VKDevice& device); + explicit VKMemoryManager(const VKDevice& device_); VKMemoryManager(const VKMemoryManager&) = delete; ~VKMemoryManager(); @@ -58,8 +59,8 @@ class VKMemoryCommitImpl final { friend MemoryMap; public: - explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, - const vk::DeviceMemory& memory, u64 begin, u64 end); + explicit VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, + const vk::DeviceMemory& memory_, u64 begin_, u64 end_); ~VKMemoryCommitImpl(); /// Maps a memory region and returns a pointer to it. @@ -93,8 +94,8 @@ private: /// Holds ownership of a memory map. class MemoryMap final { public: - explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address) - : commit{commit}, address{address} {} + explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_) + : commit{commit_}, span{span_} {} ~MemoryMap() { if (commit) { @@ -108,19 +109,24 @@ public: commit = nullptr; } + /// Returns a span to the memory map. + [[nodiscard]] std::span<u8> Span() const noexcept { + return span; + } + /// Returns the address of the memory map. - u8* GetAddress() const { - return address; + [[nodiscard]] u8* Address() const noexcept { + return span.data(); } /// Returns the address of the memory map; - operator u8*() const { - return address; + [[nodiscard]] operator u8*() const noexcept { + return span.data(); } private: const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. - u8* address{}; ///< Address to the mapped memory. + std::span<u8> span; ///< Address to the mapped memory. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index df7e8c864..083796d05 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -8,6 +8,7 @@ #include <vector> #include "common/bit_cast.h" +#include "common/cityhash.h" #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" @@ -22,7 +23,6 @@ #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - VideoCommon::Shader::CompileDepth::FullDecompile}; + .depth = VideoCommon::Shader::CompileDepth::FullDecompile, + .disable_else_derivation = true, +}; constexpr std::size_t GetStageFromProgram(std::size_t program) { return program == 0 ? 0 : program - 1; @@ -75,7 +77,7 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { case Maxwell::ShaderProgram::Fragment: return ShaderType::Fragment; default: - UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program)); + UNIMPLEMENTED_MSG("program={}", program); return ShaderType::Vertex; } } @@ -136,26 +138,24 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage, - GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_, - u32 main_offset) - : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine), - shader_ir(program_code, main_offset, compiler_settings, registry), +Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, + GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) + : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), + shader_ir(program_code, main_offset_, compiler_settings, registry), entries(GenerateShaderEntries(shader_ir)) {} Shader::~Shader() = default; -VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_, +VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - VKRenderPassCache& renderpass_cache_) - : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_}, + VKUpdateDescriptorQueue& update_descriptor_queue_) + : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, - update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {} + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ + update_descriptor_queue_} {} VKPipelineCache::~VKPipelineCache() = default; @@ -200,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { } VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { + const GraphicsPipelineCacheKey& key, u32 num_color_buffers, + VideoCommon::Shader::AsyncShaders& async_shaders) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { @@ -216,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, renderpass_cache, bindings, - program, key); + update_descriptor_queue, bindings, program, key, + num_color_buffers); } last_graphics_pipeline = pair->second.get(); return last_graphics_pipeline; @@ -230,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, - update_descriptor_queue, renderpass_cache, key, - bindings, program); + update_descriptor_queue, key, bindings, + program, num_color_buffers); gpu.ShaderNotify().MarkShaderComplete(); } last_graphics_pipeline = entry.get(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e558e6658..fbaa8257c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,7 +19,6 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/async_shaders.h" @@ -84,9 +83,9 @@ namespace Vulkan { class Shader { public: - explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine, - Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset); + explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, + Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, + VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); ~Shader(); GPUVAddr GetGpuAddr() const { @@ -124,13 +123,13 @@ public: Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - VKRenderPassCache& renderpass_cache); + VKUpdateDescriptorQueue& update_descriptor_queue); ~VKPipelineCache() override; std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, + u32 num_color_buffers, VideoCommon::Shader::AsyncShaders& async_shaders); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); @@ -153,7 +152,6 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - VKRenderPassCache& renderpass_cache; std::unique_ptr<Shader> null_shader; std::unique_ptr<Shader> null_kernel; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ee2d871e3..038760de3 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -66,15 +66,13 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; } -VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, - const VKDevice& device, VKScheduler& scheduler) - : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, - HostCounter>{rasterizer, maxwell3d, gpu_memory}, - device{device}, scheduler{scheduler}, query_pools{ - QueryPool{device, scheduler, - QueryType::SamplesPassed}, - } {} +VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + const VKDevice& device_, VKScheduler& scheduler_) + : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_}, + query_pools{ + QueryPool{device_, scheduler_, QueryType::SamplesPassed}, + } {} VKQueryCache::~VKQueryCache() { // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class @@ -95,12 +93,12 @@ void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) { query_pools[static_cast<std::size_t>(type)].Reserve(query); } -HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, - QueryType type) - : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, - type{type}, query{cache.AllocateQuery(type)}, tick{cache.Scheduler().CurrentTick()} { - const vk::Device* logical = &cache.Device().GetLogical(); - cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { +HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_, + QueryType type_) + : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, + query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} { + const vk::Device* logical = &cache_.Device().GetLogical(); + cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { logical->ResetQueryPoolEXT(query.first, query.second, 1); cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); }); @@ -119,18 +117,20 @@ u64 HostCounter::BlockingQuery() const { if (tick >= cache.Scheduler().CurrentTick()) { cache.Scheduler().Flush(); } + u64 data; - const VkResult result = cache.Device().GetLogical().GetQueryResults( + const VkResult query_result = cache.Device().GetLogical().GetQueryResults( query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); - switch (result) { + + switch (query_result) { case VK_SUCCESS: return data; case VK_ERROR_DEVICE_LOST: cache.Device().ReportLoss(); [[fallthrough]]; default: - throw vk::Exception(result); + throw vk::Exception(query_result); } } diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 2e57fb75d..837fe9ebf 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -53,9 +53,9 @@ private: class VKQueryCache final : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> { public: - explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, - const VKDevice& device, VKScheduler& scheduler); + explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + const VKDevice& device_, VKScheduler& scheduler_); ~VKQueryCache(); std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); @@ -78,8 +78,8 @@ private: class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { public: - explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, - VideoCore::QueryType type); + explicit HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_, + VideoCore::QueryType type_); ~HostCounter(); void EndQuery(); @@ -95,8 +95,8 @@ private: class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { public: - explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) - : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {} + explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_) + : CachedQueryBase{cpu_addr_, host_ptr_} {} }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e0fb8693f..04c5c859c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -19,6 +19,7 @@ #include "core/settings.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -30,8 +31,6 @@ #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" @@ -39,10 +38,13 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader_cache.h" +#include "video_core/texture_cache/texture_cache.h" namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using VideoCommon::ImageViewId; +using VideoCommon::ImageViewType; MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); @@ -58,9 +60,9 @@ MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192 namespace { -constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); +constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); -VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { +VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; @@ -83,7 +85,7 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si return viewport; } -VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { +VkRect2D GetScissorState(const Maxwell& regs, size_t index) { const auto& src = regs.scissor_test[index]; VkRect2D scissor; if (src.enable) { @@ -103,98 +105,122 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; - for (std::size_t i = 0; i < std::size(addresses); ++i) { + for (size_t i = 0; i < std::size(addresses); ++i) { addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; } return addresses; } -void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, - VkAccessFlags access) { - for (auto& [view, layout] : views) { - view->Transition(*layout, pipeline_stage, access); +struct TextureHandle { + constexpr TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); } -} + + u32 image; + u32 sampler; +}; template <typename Engine, typename Entry> -Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - std::size_t stage, std::size_t index = 0) { - const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); +TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, + size_t stage, size_t index = 0) { + const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); if constexpr (std::is_same_v<Entry, SamplerEntry>) { if (entry.is_separated) { const u32 buffer_1 = entry.buffer; const u32 buffer_2 = entry.secondary_buffer; const u32 offset_1 = entry.offset; const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); - return engine.GetTextureInfo(handle_1 | handle_2); + const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); + const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); + return TextureHandle(handle_1 | handle_2, via_header_index); } } if (entry.is_bindless) { - const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); - return engine.GetTextureInfo(tex_handle); - } - const auto& gpu_profile = engine.AccessGuestDriverProfile(); - const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); - const u32 offset = entry.offset + entry_offset; - if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { - return engine.GetStageTexture(stage_type, offset); - } else { - return engine.GetTexture(offset); - } -} - -/// @brief Determine if an attachment to be updated has to preserve contents -/// @param is_clear True when a clear is being executed -/// @param regs 3D registers -/// @return True when the contents have to be preserved -bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { - if (!is_clear) { - return true; - } - // First we have to make sure all clear masks are enabled. - if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || - !regs.clear_buffers.A) { - return true; - } - // If scissors are disabled, the whole screen is cleared - if (!regs.clear_flags.scissor) { - return false; + const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); + return TextureHandle(raw, via_header_index); } - // Then we have to confirm scissor testing clears the whole image - const std::size_t index = regs.clear_buffers.RT; - const auto& scissor = regs.scissor_test[0]; - return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || - scissor.max_y < regs.rt[index].height; + const u32 buffer = engine.GetBoundBuffer(); + const u64 offset = (entry.offset + index) * sizeof(u32); + return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); } -/// @brief Determine if an attachment to be updated has to preserve contents -/// @param is_clear True when a clear is being executed -/// @param regs 3D registers -/// @return True when the contents have to be preserved -bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { - // If we are not clearing, the contents have to be preserved - if (!is_clear) { - return true; - } - // For depth stencil clears we only have to confirm scissor test covers the whole image - if (!regs.clear_flags.scissor) { - return false; - } - // Make sure the clear cover the whole image - const auto& scissor = regs.scissor_test[0]; - return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || - scissor.max_y < regs.zeta_height; -} - -template <std::size_t N> +template <size_t N> std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { std::array<VkDeviceSize, N> expanded; std::copy(strides.begin(), strides.end(), expanded.begin()); return expanded; } +ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { + if (entry.is_buffer) { + return ImageViewType::e2D; + } + switch (entry.type) { + case Tegra::Shader::TextureType::Texture1D: + return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; + case Tegra::Shader::TextureType::Texture2D: + return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; + case Tegra::Shader::TextureType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::TextureType::TextureCube: + return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { + switch (entry.type) { + case Tegra::Shader::ImageType::Texture1D: + return ImageViewType::e1D; + case Tegra::Shader::ImageType::Texture1DArray: + return ImageViewType::e1DArray; + case Tegra::Shader::ImageType::Texture2D: + return ImageViewType::e2D; + case Tegra::Shader::ImageType::Texture2DArray: + return ImageViewType::e2DArray; + case Tegra::Shader::ImageType::Texture3D: + return ImageViewType::e3D; + case Tegra::Shader::ImageType::TextureBuffer: + return ImageViewType::Buffer; + } + UNREACHABLE(); + return ImageViewType::e2D; +} + +void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, + ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { + for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + } + for (const auto& entry : entries.samplers) { + for (size_t i = 0; i < entry.size; ++i) { + const VkSampler sampler = *sampler_ptr++; + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); + update_descriptor_queue.AddSampledImage(handle, sampler); + } + } + for ([[maybe_unused]] const auto& entry : entries.storage_texels) { + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + } + for (const auto& entry : entries.images) { + // TODO: Mark as modified + const ImageViewId image_view_id = *image_view_id_ptr++; + const ImageView& image_view = texture_cache.GetImageView(image_view_id); + const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); + update_descriptor_queue.AddImage(handle); + } +} + } // Anonymous namespace class BufferBindings final { @@ -290,7 +316,7 @@ public: private: // Some of these fields are intentionally left uninitialized to avoid initializing them twice. struct { - std::size_t num_buffers = 0; + size_t num_buffers = 0; std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; @@ -303,7 +329,7 @@ private: VkIndexType type; } index; - template <std::size_t N> + template <size_t N> void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { if (device.IsExtExtendedDynamicStateSupported()) { if (index.buffer) { @@ -320,7 +346,7 @@ private: } } - template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state> + template <size_t N, bool is_indexed, bool has_extended_dynamic_state> void BindStatic(VKScheduler& scheduler) const { static_assert(N <= Maxwell::NumVertexArrays); if constexpr (N == 0) { @@ -380,28 +406,31 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { } } -RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, +RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MemoryManager& gpu_memory_, - Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_, + Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, const VKDevice& device_, VKMemoryManager& memory_manager_, StateTracker& state_tracker_, VKScheduler& scheduler_) - : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_), - maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), - device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), - scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), - descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), - renderpass_cache(device), + : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, + gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, + screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_}, + state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), + staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler), + update_descriptor_queue(device, scheduler), + blit_image(device, scheduler, state_tracker, descriptor_pool), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), + texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image}, + texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue, renderpass_cache), - buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool), - sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), + descriptor_pool, update_descriptor_queue), + buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer, + staging_pool), + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, scheduler), - wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window) { + wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { scheduler.SetQueryCache(query_cache); if (device.UseAsynchronousShaders()) { async_shaders.AllocateWorkers(); @@ -427,9 +456,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const DrawParameters draw_params = SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); - update_descriptor_queue.Acquire(); - sampled_views.clear(); - image_views.clear(); + auto lock = texture_cache.AcquireLock(); + texture_cache.SynchronizeGraphicsDescriptors(); + + texture_cache.UpdateRenderTargets(false); const auto shaders = pipeline_cache.GetShaders(); key.shaders = GetShaderAddresses(shaders); @@ -437,30 +467,24 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { buffer_cache.Unmap(); - const Texceptions texceptions = UpdateAttachments(false); - SetupImageTransitions(texceptions, color_attachments, zeta_attachment); - - key.renderpass_params = GetRenderPassParams(texceptions); - key.padding = 0; + const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); + key.renderpass = framebuffer->RenderPass(); - auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + auto* const pipeline = + pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { // Async graphics pipeline was not ready. return; } - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - - const auto renderpass = pipeline->GetRenderPass(); - const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - - UpdateDynamicStates(); - buffer_bindings.Bind(device, scheduler); BeginTransformFeedback(); + scheduler.RequestRenderpass(framebuffer); + scheduler.BindGraphicsPipeline(pipeline->GetHandle()); + UpdateDynamicStates(); + const auto pipeline_layout = pipeline->GetLayout(); const auto descriptor_set = pipeline->CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { @@ -481,9 +505,6 @@ void RasterizerVulkan::Clear() { return; } - sampled_views.clear(); - image_views.clear(); - query_cache.UpdateCounters(); const auto& regs = maxwell3d.regs; @@ -495,20 +516,24 @@ void RasterizerVulkan::Clear() { return; } - [[maybe_unused]] const auto texceptions = UpdateAttachments(true); - DEBUG_ASSERT(texceptions.none()); - SetupImageTransitions(0, color_attachments, zeta_attachment); + auto lock = texture_cache.AcquireLock(); + texture_cache.UpdateRenderTargets(true); + const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); + const VkExtent2D render_area = framebuffer->RenderArea(); + scheduler.RequestRenderpass(framebuffer); - const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); - const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - - VkClearRect clear_rect; - clear_rect.baseArrayLayer = regs.clear_buffers.layer; - clear_rect.layerCount = 1; - clear_rect.rect = GetScissorState(regs, 0); - clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); - clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); + VkClearRect clear_rect{ + .rect = GetScissorState(regs, 0), + .baseArrayLayer = regs.clear_buffers.layer, + .layerCount = 1, + }; + if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) { + return; + } + clear_rect.rect.extent = VkExtent2D{ + .width = std::min(clear_rect.rect.extent.width, render_area.width), + .height = std::min(clear_rect.rect.extent.height, render_area.height), + }; if (use_color) { VkClearValue clear_value; @@ -549,9 +574,6 @@ void RasterizerVulkan::Clear() { void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { MICROPROFILE_SCOPE(Vulkan_Compute); - update_descriptor_queue.Acquire(); - sampled_views.clear(); - image_views.clear(); query_cache.UpdateCounters(); @@ -570,29 +592,43 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { // Compute dispatches can't be executed inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); - buffer_cache.Map(CalculateComputeStreamBufferSize()); + image_view_indices.clear(); + sampler_handles.clear(); + + auto lock = texture_cache.AcquireLock(); + texture_cache.SynchronizeComputeDescriptors(); const auto& entries = pipeline.GetEntries(); - SetupComputeConstBuffers(entries); - SetupComputeGlobalBuffers(entries); SetupComputeUniformTexels(entries); SetupComputeTextures(entries); SetupComputeStorageTexels(entries); SetupComputeImages(entries); - buffer_cache.Unmap(); + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); - TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT); - TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); + buffer_cache.Map(CalculateComputeStreamBufferSize()); + update_descriptor_queue.Acquire(); + + SetupComputeConstBuffers(entries); + SetupComputeGlobalBuffers(entries); + + ImageViewId* image_view_id_ptr = image_view_ids.data(); + VkSampler* sampler_ptr = sampler_handles.data(); + PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, + sampler_ptr); + + buffer_cache.Unmap(); + + const VkPipeline pipeline_handle = pipeline.GetHandle(); + const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); + const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), - layout = pipeline.GetLayout(), - descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) { + grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, + descriptor_set](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET, descriptor_set, {}); cmdbuf.Dispatch(grid_x, grid_y, grid_z); }); @@ -613,7 +649,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.FlushRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.DownloadMemory(addr, size); + } buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } @@ -622,14 +661,18 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { if (!Settings::IsGPULevelHigh()) { return buffer_cache.MustFlushRegion(addr, size); } - return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); + return texture_cache.IsRegionGpuModified(addr, size) || + buffer_cache.MustFlushRegion(addr, size); } void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.InvalidateRegion(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); @@ -639,17 +682,28 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } - texture_cache.OnCPUWrite(addr, size); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.WriteMemory(addr, size); + } pipeline_cache.OnCPUWrite(addr, size); buffer_cache.OnCPUWrite(addr, size); } void RasterizerVulkan::SyncGuestHost() { - texture_cache.SyncGuestHost(); buffer_cache.SyncGuestHost(); pipeline_cache.SyncGuestHost(); } +void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { + { + auto lock = texture_cache.AcquireLock(); + texture_cache.UnmapMemory(addr, size); + } + buffer_cache.OnCPUWrite(addr, size); + pipeline_cache.OnCPUWrite(addr, size); +} + void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write<u32>(addr, value); @@ -700,6 +754,14 @@ void RasterizerVulkan::WaitForIdle() { }); } +void RasterizerVulkan::FragmentBarrier() { + // We already put barriers when a render pass finishes +} + +void RasterizerVulkan::TiledCacheBarrier() { + // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend +} + void RasterizerVulkan::FlushCommands() { if (draw_counter > 0) { draw_counter = 0; @@ -710,14 +772,20 @@ void RasterizerVulkan::FlushCommands() { void RasterizerVulkan::TickFrame() { draw_counter = 0; update_descriptor_queue.TickFrame(); + fence_manager.TickFrame(); buffer_cache.TickFrame(); staging_pool.TickFrame(); + { + auto lock = texture_cache.AcquireLock(); + texture_cache.TickFrame(); + } } -bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, +bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { - texture_cache.DoFermiCopy(src, dst, copy_config); + auto lock = texture_cache.AcquireLock(); + texture_cache.BlitImage(dst, src, copy_config); return true; } @@ -727,20 +795,16 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return false; } - const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; - if (!surface) { + auto lock = texture_cache.AcquireLock(); + ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); + if (!image_view) { return false; } - // Verify that the cached surface is the same size and format as the requested framebuffer - const auto& params{surface->GetSurfaceParams()}; - ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - - screen_info.image = &surface->GetImage(); - screen_info.width = params.width; - screen_info.height = params.height; - screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; + screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); + screen_info.width = image_view->size.width; + screen_info.height = image_view->size.height; + screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } @@ -765,103 +829,6 @@ void RasterizerVulkan::FlushWork() { draw_counter = 0; } -RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { - MICROPROFILE_SCOPE(Vulkan_RenderTargets); - - const auto& regs = maxwell3d.regs; - auto& dirty = maxwell3d.dirty.flags; - const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; - dirty[VideoCommon::Dirty::RenderTargets] = false; - - texture_cache.GuardRenderTargets(true); - - Texceptions texceptions; - for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - if (update_rendertargets) { - const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); - color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); - } - if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { - texceptions[rt] = true; - } - } - - if (update_rendertargets) { - const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); - zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); - } - if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { - texceptions[ZETA_TEXCEPTION_INDEX] = true; - } - - texture_cache.GuardRenderTargets(false); - - return texceptions; -} - -bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { - bool overlap = false; - for (auto& [view, layout] : sampled_views) { - if (!attachment.IsSameSurface(*view)) { - continue; - } - overlap = true; - *layout = VK_IMAGE_LAYOUT_GENERAL; - } - return overlap; -} - -std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( - VkRenderPass renderpass) { - FramebufferCacheKey key{ - .renderpass = renderpass, - .width = std::numeric_limits<u32>::max(), - .height = std::numeric_limits<u32>::max(), - .layers = std::numeric_limits<u32>::max(), - .views = {}, - }; - - const auto try_push = [&key](const View& view) { - if (!view) { - return false; - } - key.views.push_back(view->GetAttachment()); - key.width = std::min(key.width, view->GetWidth()); - key.height = std::min(key.height, view->GetHeight()); - key.layers = std::min(key.layers, view->GetNumLayers()); - return true; - }; - - const auto& regs = maxwell3d.regs; - const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); - for (std::size_t index = 0; index < num_attachments; ++index) { - if (try_push(color_attachments[index])) { - texture_cache.MarkColorBufferInUse(index); - } - } - if (try_push(zeta_attachment)) { - texture_cache.MarkDepthBufferInUse(); - } - - const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); - auto& framebuffer = fbentry->second; - if (is_cache_miss) { - framebuffer = device.GetLogical().CreateFramebuffer({ - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .renderPass = key.renderpass, - .attachmentCount = static_cast<u32>(key.views.size()), - .pAttachments = key.views.data(), - .width = key.width, - .height = key.height, - .layers = key.layers, - }); - } - - return {*framebuffer, VkExtent2D{key.width, key.height}}; -} - RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, bool is_indexed, @@ -885,51 +852,37 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt void RasterizerVulkan::SetupShaderDescriptors( const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { - texture_cache.GuardSamplers(true); - - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - // Skip VertexA stage + image_view_indices.clear(); + sampler_handles.clear(); + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { Shader* const shader = shaders[stage + 1]; if (!shader) { continue; } const auto& entries = shader->GetEntries(); - SetupGraphicsConstBuffers(entries, stage); - SetupGraphicsGlobalBuffers(entries, stage); SetupGraphicsUniformTexels(entries, stage); SetupGraphicsTextures(entries, stage); SetupGraphicsStorageTexels(entries, stage); SetupGraphicsImages(entries, stage); } - texture_cache.GuardSamplers(false); -} + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); -void RasterizerVulkan::SetupImageTransitions( - Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, - const View& zeta_attachment) { - TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); - TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); + update_descriptor_queue.Acquire(); - for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { - const auto color_attachment = color_attachments[rt]; - if (color_attachment == nullptr) { + ImageViewId* image_view_id_ptr = image_view_ids.data(); + VkSampler* sampler_ptr = sampler_handles.data(); + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + // Skip VertexA stage + Shader* const shader = shaders[stage + 1]; + if (!shader) { continue; } - const auto image_layout = - texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); - } - - if (zeta_attachment != nullptr) { - const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - zeta_attachment->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + const auto& entries = shader->GetEntries(); + SetupGraphicsConstBuffers(entries, stage); + SetupGraphicsGlobalBuffers(entries, stage); + PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, + sampler_ptr); } } @@ -1001,7 +954,7 @@ void RasterizerVulkan::EndTransformFeedback() { void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { const auto& regs = maxwell3d.regs; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; if (!vertex_array.IsEnabled()) { continue; @@ -1010,7 +963,7 @@ void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; ASSERT(end >= start); - const std::size_t size = end - start; + const size_t size = end - start; if (size == 0) { buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); continue; @@ -1071,7 +1024,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar } } -void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_ConstBuffers); const auto& shader_stage = maxwell3d.state.shader_stages[stage]; for (const auto& entry : entries.const_buffers) { @@ -1079,7 +1032,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s } } -void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); const auto& cbufs{maxwell3d.state.shader_stages[stage]}; @@ -1089,37 +1042,49 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, } } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; - SetupUniformTexels(image, entry); + const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); + image_view_indices.push_back(handle.image); } } -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); - SetupTexture(texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const TextureHandle handle = + GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); } } } -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; - SetupStorageTexel(image, entry); + const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); + image_view_indices.push_back(handle.image); } } -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { +void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { MICROPROFILE_SCOPE(Vulkan_Images); + const auto& regs = maxwell3d.regs; + const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; - SetupImage(tic, entry); + const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); + image_view_indices.push_back(handle.image); } } @@ -1129,11 +1094,12 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { for (const auto& entry : entries.const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); - Tegra::Engines::ConstBufferInfo buffer; - buffer.address = config.Address(); - buffer.size = config.size; - buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(entry, buffer); + const Tegra::Engines::ConstBufferInfo info{ + .address = config.Address(), + .size = config.size, + .enabled = mask[entry.GetIndex()], + }; + SetupConstBuffer(entry, info); } } @@ -1148,35 +1114,46 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; - SetupUniformTexels(image, entry); + const TextureHandle handle = + GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); + image_view_indices.push_back(handle.image); } } void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.samplers) { - for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); - SetupTexture(texture, entry); + for (size_t index = 0; index < entry.size; ++index) { + const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, + COMPUTE_SHADER_INDEX, index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); } } } void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; - SetupStorageTexel(image, entry); + const TextureHandle handle = + GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); + image_view_indices.push_back(handle.image); } } void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Images); + const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; - SetupImage(tic, entry); + const TextureHandle handle = + GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); + image_view_indices.push_back(handle.image); } } @@ -1187,14 +1164,12 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); return; } - // Align the size to avoid bad std140 interactions - const std::size_t size = - Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); + const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); ASSERT(size <= MaxConstbufferSize); - const auto info = - buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); + const u64 alignment = device.GetUniformBufferAlignment(); + const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } @@ -1207,7 +1182,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd // because Vulkan doesn't like empty buffers. // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the // default buffer. - static constexpr std::size_t dummy_size = 4; + static constexpr size_t dummy_size = 4; const auto info = buffer_cache.GetEmptyBuffer(dummy_size); update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); return; @@ -1218,55 +1193,6 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd update_descriptor_queue.AddBuffer(info.handle, info.offset, size); } -void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, - const UniformTexelEntry& entry) { - const auto view = texture_cache.GetTextureSurface(tic, entry); - ASSERT(view->IsBufferView()); - - update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); -} - -void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, - const SamplerEntry& entry) { - auto view = texture_cache.GetTextureSurface(texture.tic, entry); - ASSERT(!view->IsBufferView()); - - const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); - const auto sampler = sampler_cache.GetSampler(texture.tsc); - update_descriptor_queue.AddSampledImage(sampler, image_view); - - VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); - *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - sampled_views.push_back(ImageView{std::move(view), image_layout}); -} - -void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic, - const StorageTexelEntry& entry) { - const auto view = texture_cache.GetImageSurface(tic, entry); - ASSERT(view->IsBufferView()); - - update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); -} - -void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { - auto view = texture_cache.GetImageSurface(tic, entry); - - if (entry.is_written) { - view->MarkAsModified(texture_cache.Tick()); - } - - UNIMPLEMENTED_IF(tic.IsBuffer()); - - const VkImageView image_view = - view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); - update_descriptor_queue.AddImage(image_view); - - VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); - *image_layout = VK_IMAGE_LAYOUT_GENERAL; - image_views.push_back(ImageView{std::move(view), image_layout}); -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; @@ -1458,8 +1384,8 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } -std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { - std::size_t size = CalculateVertexArraysSize(); +size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { + size_t size = CalculateVertexArraysSize(); if (is_indexed) { size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); } @@ -1467,15 +1393,15 @@ std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) return size; } -std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { +size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { return Tegra::Engines::KeplerCompute::NumConstBuffers * (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); } -std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { +size_t RasterizerVulkan::CalculateVertexArraysSize() const { const auto& regs = maxwell3d.regs; - std::size_t size = 0; + size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { // This implementation assumes that all attributes are used in the shader. const GPUVAddr start{regs.vertex_array[index].StartAddress()}; @@ -1487,12 +1413,12 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { return size; } -std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { - return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * - static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); +size_t RasterizerVulkan::CalculateIndexBufferSize() const { + return static_cast<size_t>(maxwell3d.regs.index_array.count) * + static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); } -std::size_t RasterizerVulkan::CalculateConstBufferSize( +size_t RasterizerVulkan::CalculateConstBufferSize( const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { if (entry.IsIndirect()) { // Buffer is accessed indirectly, so upload the entire thing @@ -1503,37 +1429,10 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( } } -RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { - const auto& regs = maxwell3d.regs; - const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); - - RenderPassParams params; - params.color_formats = {}; - std::size_t color_texceptions = 0; - - std::size_t index = 0; - for (std::size_t rt = 0; rt < num_attachments; ++rt) { - const auto& rendertarget = regs.rt[rt]; - if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { - continue; - } - params.color_formats[index] = static_cast<u8>(rendertarget.format); - color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index; - ++index; - } - params.num_color_attachments = static_cast<u8>(index); - params.texceptions = static_cast<u8>(color_texceptions); - - params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0; - params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; - return params; -} - VkBuffer RasterizerVulkan::DefaultBuffer() { if (default_buffer) { return *default_buffer; } - default_buffer = device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 237e51fa4..990f9e031 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -11,11 +11,11 @@ #include <vector> #include <boost/container/static_vector.hpp> -#include <boost/functional/hash.hpp> #include "common/common_types.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -24,10 +24,9 @@ #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -49,67 +48,16 @@ namespace Vulkan { struct VKScreenInfo; -using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>; - -struct FramebufferCacheKey { - VkRenderPass renderpass{}; - u32 width = 0; - u32 height = 0; - u32 layers = 0; - ImageViewsPack views; - - std::size_t Hash() const noexcept { - std::size_t hash = 0; - boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass)); - for (const auto& view : views) { - boost::hash_combine(hash, static_cast<VkImageView>(view)); - } - boost::hash_combine(hash, width); - boost::hash_combine(hash, height); - boost::hash_combine(hash, layers); - return hash; - } - - bool operator==(const FramebufferCacheKey& rhs) const noexcept { - return std::tie(renderpass, views, width, height, layers) == - std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); - } - - bool operator!=(const FramebufferCacheKey& rhs) const noexcept { - return !operator==(rhs); - } -}; - -} // namespace Vulkan - -namespace std { - -template <> -struct hash<Vulkan::FramebufferCacheKey> { - std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace Vulkan { - class StateTracker; class BufferBindings; -struct ImageView { - View view; - VkImageLayout* layout = nullptr; -}; - class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - VKScreenInfo& screen_info, const VKDevice& device, - VKMemoryManager& memory_manager, StateTracker& state_tracker, - VKScheduler& scheduler); + explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + VKScreenInfo& screen_info_, const VKDevice& device_, + VKMemoryManager& memory_manager_, StateTracker& state_tracker_, + VKScheduler& scheduler_); ~RasterizerVulkan() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -123,15 +71,18 @@ public: void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; + void UnmapMemory(VAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitForIdle() override; + void FragmentBarrier() override; + void TiledCacheBarrier() override; void FlushCommands() override; void TickFrame() override; - bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst, + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; @@ -145,11 +96,17 @@ public: } /// Maximum supported size that a constbuffer can have in bytes. - static constexpr std::size_t MaxConstbufferSize = 0x10000; + static constexpr size_t MaxConstbufferSize = 0x10000; static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); private: + static constexpr size_t MAX_TEXTURES = 192; + static constexpr size_t MAX_IMAGES = 48; + static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; + + static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); + struct DrawParameters { void Draw(vk::CommandBuffer cmdbuf) const; @@ -160,20 +117,8 @@ private: bool is_indexed = 0; }; - using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>; - - static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; - static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); - void FlushWork(); - /// @brief Updates the currently bound attachments - /// @param is_clear True when the framebuffer is updated as a clear - /// @return Bitfield of attachments being used as sampled textures - Texceptions UpdateAttachments(bool is_clear); - - std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); - /// Setups geometry buffers and state. DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, bool is_indexed, bool is_instanced); @@ -181,18 +126,12 @@ private: /// Setup descriptors in the graphics pipeline. void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); - void SetupImageTransitions(Texceptions texceptions, - const std::array<View, Maxwell::NumRenderTargets>& color_attachments, - const View& zeta_attachment); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); - void SetupVertexArrays(BufferBindings& buffer_bindings); void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); @@ -238,14 +177,6 @@ private: void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); - void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry); - - void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); - - void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry); - - void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -262,18 +193,16 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); - std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; - - std::size_t CalculateComputeStreamBufferSize() const; + size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; - std::size_t CalculateVertexArraysSize() const; + size_t CalculateComputeStreamBufferSize() const; - std::size_t CalculateIndexBufferSize() const; + size_t CalculateVertexArraysSize() const; - std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, - const Tegra::Engines::ConstBufferInfo& buffer) const; + size_t CalculateIndexBufferSize() const; - RenderPassParams GetRenderPassParams(Texceptions texceptions) const; + size_t CalculateConstBufferSize(const ConstBufferEntry& entry, + const Tegra::Engines::ConstBufferInfo& buffer) const; VkBuffer DefaultBuffer(); @@ -288,18 +217,19 @@ private: StateTracker& state_tracker; VKScheduler& scheduler; + VKStreamBuffer stream_buffer; VKStagingBufferPool staging_pool; VKDescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; - VKRenderPassCache renderpass_cache; + BlitImageHelper blit_image; QuadArrayPass quad_array_pass; QuadIndexedPass quad_indexed_pass; Uint8Pass uint8_pass; - VKTextureCache texture_cache; + TextureCacheRuntime texture_cache_runtime; + TextureCache texture_cache; VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; - VKSamplerCache sampler_cache; VKQueryCache query_cache; VKFenceManager fence_manager; @@ -308,16 +238,11 @@ private: vk::Event wfi_event; VideoCommon::Shader::AsyncShaders async_shaders; - std::array<View, Maxwell::NumRenderTargets> color_attachments; - View zeta_attachment; - - std::vector<ImageView> sampled_views; - std::vector<ImageView> image_views; + boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; + std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; + boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles; u32 draw_counter = 0; - - // TODO(Rodrigo): Invalidate on image destruction - std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp deleted file mode 100644 index 80284cf92..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <cstring> -#include <memory> -#include <vector> - -#include "common/cityhash.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -std::size_t RenderPassParams::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); - return static_cast<std::size_t>(hash); -} - -bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept { - return std::memcmp(&rhs, this, sizeof *this) == 0; -} - -VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} - -VKRenderPassCache::~VKRenderPassCache() = default; - -VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { - const auto [pair, is_cache_miss] = cache.try_emplace(params); - auto& entry = pair->second; - if (is_cache_miss) { - entry = CreateRenderPass(params); - } - return *entry; -} - -vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { - using namespace VideoCore::Surface; - const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); - - std::vector<VkAttachmentDescription> descriptors; - descriptors.reserve(num_attachments); - - std::vector<VkAttachmentReference> color_references; - color_references.reserve(num_attachments); - - for (std::size_t rt = 0; rt < num_attachments; ++rt) { - const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); - const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); - const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); - ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", - static_cast<int>(pixel_format)); - - // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed. - const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - descriptors.push_back({ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = format.format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = color_layout, - .finalLayout = color_layout, - }); - - color_references.push_back({ - .attachment = static_cast<u32>(rt), - .layout = color_layout, - }); - } - - VkAttachmentReference zeta_attachment_ref; - const bool has_zeta = params.zeta_format != 0; - if (has_zeta) { - const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format); - const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format); - const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); - ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", - static_cast<int>(pixel_format)); - - const VkImageLayout zeta_layout = params.zeta_texception != 0 - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - descriptors.push_back({ - .flags = 0, - .format = format.format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = zeta_layout, - .finalLayout = zeta_layout, - }); - - zeta_attachment_ref = { - .attachment = static_cast<u32>(num_attachments), - .layout = zeta_layout, - }; - } - - const VkSubpassDescription subpass_description{ - .flags = 0, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .pInputAttachments = nullptr, - .colorAttachmentCount = static_cast<u32>(color_references.size()), - .pColorAttachments = color_references.data(), - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - - VkAccessFlags access = 0; - VkPipelineStageFlags stage = 0; - if (!color_references.empty()) { - access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - } - - if (has_zeta) { - access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - } - - const VkSubpassDependency subpass_dependency{ - .srcSubpass = VK_SUBPASS_EXTERNAL, - .dstSubpass = 0, - .srcStageMask = stage, - .dstStageMask = stage, - .srcAccessMask = 0, - .dstAccessMask = access, - .dependencyFlags = 0, - }; - - return device.GetLogical().CreateRenderPass({ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast<u32>(descriptors.size()), - .pAttachments = descriptors.data(), - .subpassCount = 1, - .pSubpasses = &subpass_description, - .dependencyCount = 1, - .pDependencies = &subpass_dependency, - }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h deleted file mode 100644 index 8b0fec720..000000000 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <type_traits> -#include <unordered_map> - -#include <boost/container/static_vector.hpp> -#include <boost/functional/hash.hpp> - -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/surface.h" - -namespace Vulkan { - -class VKDevice; - -struct RenderPassParams { - std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats; - u8 num_color_attachments; - u8 texceptions; - - u8 zeta_format; - u8 zeta_texception; - - std::size_t Hash() const noexcept; - - bool operator==(const RenderPassParams& rhs) const noexcept; - - bool operator!=(const RenderPassParams& rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(std::has_unique_object_representations_v<RenderPassParams>); -static_assert(std::is_trivially_copyable_v<RenderPassParams>); -static_assert(std::is_trivially_constructible_v<RenderPassParams>); - -} // namespace Vulkan - -namespace std { - -template <> -struct hash<Vulkan::RenderPassParams> { - std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace Vulkan { - -class VKRenderPassCache final { -public: - explicit VKRenderPassCache(const VKDevice& device); - ~VKRenderPassCache(); - - VkRenderPass GetRenderPass(const RenderPassParams& params); - -private: - vk::RenderPass CreateRenderPass(const RenderPassParams& params) const; - - const VKDevice& device; - std::unordered_map<RenderPassParams, vk::RenderPass> cache; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp deleted file mode 100644 index b068888f9..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <unordered_map> - -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_sampler_cache.h" -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/textures/texture.h" - -using Tegra::Texture::TextureMipmapFilter; - -namespace Vulkan { - -namespace { - -VkBorderColor ConvertBorderColor(std::array<float, 4> color) { - // TODO(Rodrigo): Manage integer border colors - if (color == std::array<float, 4>{0, 0, 0, 0}) { - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - } else if (color == std::array<float, 4>{0, 0, 0, 1}) { - return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - } else if (color == std::array<float, 4>{1, 1, 1, 1}) { - return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - } - if (color[0] + color[1] + color[2] > 1.35f) { - // If color elements are brighter than roughly 0.5 average, use white border - return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - } else if (color[3] > 0.5f) { - return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - } else { - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - } -} - -} // Anonymous namespace - -VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} - -VKSamplerCache::~VKSamplerCache() = default; - -vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { - const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); - const std::array color = tsc.GetBorderColor(); - - VkSamplerCustomBorderColorCreateInfoEXT border{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, - .pNext = nullptr, - .customBorderColor = {}, - .format = VK_FORMAT_UNDEFINED, - }; - std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); - - return device.GetLogical().CreateSampler({ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = arbitrary_borders ? &border : nullptr, - .flags = 0, - .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), - .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), - .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), - .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), - .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), - .mipLodBias = tsc.GetLodBias(), - .anisotropyEnable = - static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), - .maxAnisotropy = tsc.GetMaxAnisotropy(), - .compareEnable = tsc.depth_compare_enabled, - .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), - .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), - .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), - .borderColor = - arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), - .unnormalizedCoordinates = VK_FALSE, - }); -} - -VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { - return *sampler; -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h deleted file mode 100644 index a33d1c0ee..000000000 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/sampler_cache.h" -#include "video_core/textures/texture.h" - -namespace Vulkan { - -class VKDevice; - -class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> { -public: - explicit VKSamplerCache(const VKDevice& device); - ~VKSamplerCache(); - -protected: - vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - - VkSampler ToSamplerType(const vk::Sampler& sampler) const override; - -private: - const VKDevice& device; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 1a483dc71..c104c6fe3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -16,6 +16,7 @@ #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() { AcquireNewChunk(); } -void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, - VkExtent2D render_area) { - if (renderpass == state.renderpass && framebuffer == state.framebuffer && +void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) { + const VkRenderPass renderpass = framebuffer->RenderPass(); + const VkFramebuffer framebuffer_handle = framebuffer->Handle(); + const VkExtent2D render_area = framebuffer->RenderArea(); + if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer && render_area.width == state.render_area.width && render_area.height == state.render_area.height) { return; } - const bool end_renderpass = state.renderpass != nullptr; + EndRenderPass(); state.renderpass = renderpass; - state.framebuffer = framebuffer; + state.framebuffer = framebuffer_handle; state.render_area = render_area; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = framebuffer, - .renderArea = - { - .offset = {.x = 0, .y = 0}, - .extent = render_area, - }, - .clearValueCount = 0, - .pClearValues = nullptr, - }; - - Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { - if (end_renderpass) { - cmdbuf.EndRenderPass(); - } + Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) { + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer_handle, + .renderArea = + { + .offset = {.x = 0, .y = 0}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); }); + num_renderpass_images = framebuffer->NumImages(); + renderpass_images = framebuffer->Images(); + renderpass_image_ranges = framebuffer->ImageRanges(); } void VKScheduler::RequestOutsideRenderPassOperationContext() { @@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() { if (!state.renderpass) { return; } + Record([num_images = num_renderpass_images, images = renderpass_images, + ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { + std::array<VkImageMemoryBarrier, 9> barriers; + for (size_t i = 0; i < num_images; ++i) { + barriers[i] = VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = images[i], + .subresourceRange = ranges[i], + }; + } + cmdbuf.EndRenderPass(); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, + vk::Span(barriers.data(), num_images)); + }); state.renderpass = nullptr; - Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); + num_renderpass_images = 0; } void VKScheduler::AcquireNewChunk() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 7be8a19f0..0a36c8fad 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -17,6 +17,7 @@ namespace Vulkan { class CommandPool; +class Framebuffer; class MasterSemaphore; class StateTracker; class VKDevice; @@ -52,8 +53,7 @@ public: void DispatchWork(); /// Requests to begin a renderpass. - void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, - VkExtent2D render_area); + void RequestRenderpass(const Framebuffer* framebuffer); /// Requests the current executino context to be able to execute operations only allowed outside /// of a renderpass. @@ -62,6 +62,9 @@ public: /// Binds a pipeline to the current execution context. void BindGraphicsPipeline(VkPipeline pipeline); + /// Invalidates current command buffer state except for render passes + void InvalidateState(); + /// Assigns the query cache. void SetQueryCache(VKQueryCache& query_cache_) { query_cache = &query_cache_; @@ -104,7 +107,7 @@ private: template <typename T> class TypedCommand final : public Command { public: - explicit TypedCommand(T&& command) : command{std::move(command)} {} + explicit TypedCommand(T&& command_) : command{std::move(command_)} {} ~TypedCommand() override = default; TypedCommand(TypedCommand&&) = delete; @@ -170,8 +173,6 @@ private: void AllocateNewContext(); - void InvalidateState(); - void EndPendingOperations(); void EndRenderPass(); @@ -192,6 +193,11 @@ private: std::thread worker_thread; State state; + + u32 num_renderpass_images = 0; + std::array<VkImage, 9> renderpass_images{}; + std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; + Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; std::mutex mutex; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index fed9ebecd..09d6f9f35 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -55,8 +55,8 @@ enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; class Expression final { public: - Expression(Id id, Type type) : id{id}, type{type} { - ASSERT(type != Type::Void); + Expression(Id id_, Type type_) : id{id_}, type{type_} { + ASSERT(type_ != Type::Void); } Expression() : type{Type::Void} {} @@ -102,7 +102,7 @@ struct GenericVaryingDescription { bool is_scalar = false; }; -spv::Dim GetSamplerDim(const Sampler& sampler) { +spv::Dim GetSamplerDim(const SamplerEntry& sampler) { ASSERT(!sampler.is_buffer); switch (sampler.type) { case Tegra::Shader::TextureType::Texture1D: @@ -114,12 +114,12 @@ spv::Dim GetSamplerDim(const Sampler& sampler) { case Tegra::Shader::TextureType::TextureCube: return spv::Dim::Cube; default: - UNIMPLEMENTED_MSG("Unimplemented sampler type={}", static_cast<int>(sampler.type)); + UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type); return spv::Dim::Dim2D; } } -std::pair<spv::Dim, bool> GetImageDim(const Image& image) { +std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) { switch (image.type) { case Tegra::Shader::ImageType::Texture1D: return {spv::Dim::Dim1D, false}; @@ -134,7 +134,7 @@ std::pair<spv::Dim, bool> GetImageDim(const Image& image) { case Tegra::Shader::ImageType::Texture3D: return {spv::Dim::Dim3D, false}; default: - UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(image.type)); + UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type); return {spv::Dim::Dim2D, false}; } } @@ -281,12 +281,12 @@ u32 ShaderVersion(const VKDevice& device) { class SPIRVDecompiler final : public Sirit::Module { public: - explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, - const Registry& registry, const Specialization& specialization) - : Module(ShaderVersion(device)), device{device}, ir{ir}, stage{stage}, - header{ir.GetHeader()}, registry{registry}, specialization{specialization} { - if (stage != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); + explicit SPIRVDecompiler(const VKDevice& device_, const ShaderIR& ir_, ShaderType stage_, + const Registry& registry_, const Specialization& specialization_) + : Module(ShaderVersion(device_)), device{device_}, ir{ir_}, stage{stage_}, + header{ir_.GetHeader()}, registry{registry_}, specialization{specialization_} { + if (stage_ != ShaderType::Compute) { + transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); } AddCapability(spv::Capability::Shader); @@ -330,7 +330,7 @@ public: if (device.IsFloat16Supported()) { AddCapability(spv::Capability::Float16); } - t_scalar_half = Name(TypeFloat(device.IsFloat16Supported() ? 16 : 32), "scalar_half"); + t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half"); t_half = Name(TypeVector(t_scalar_half, 2), "half"); const Id main = Decompile(); @@ -980,7 +980,7 @@ private: return binding; } - void DeclareImage(const Image& image, u32& binding) { + void DeclareImage(const ImageEntry& image, u32& binding) { const auto [dim, arrayed] = GetImageDim(image); constexpr int depth = 0; constexpr bool ms = false; @@ -1088,9 +1088,9 @@ private: indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); } - const auto& output_attributes = ir.GetOutputAttributes(); - const bool declare_clip_distances = - std::any_of(output_attributes.begin(), output_attributes.end(), [](const auto& index) { + const auto& ir_output_attributes = ir.GetOutputAttributes(); + const bool declare_clip_distances = std::any_of( + ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) { return index == Attribute::Index::ClipDistances0123 || index == Attribute::Index::ClipDistances4567; }); @@ -1254,7 +1254,7 @@ private: const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); return {OpLoad(GetTypeDefinition(type), pointer), type}; } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); + UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); return {v_float_zero, Type::Float}; } @@ -1890,7 +1890,7 @@ private: case Tegra::Shader::TextureType::Texture3D: return 3; default: - UNREACHABLE_MSG("Invalid texture type={}", static_cast<int>(type)); + UNREACHABLE_MSG("Invalid texture type={}", type); return 2; } }(); @@ -2094,6 +2094,7 @@ private: return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); default: UNREACHABLE(); + return v_true; } } @@ -2125,8 +2126,7 @@ private: OpStore(z_pointer, depth); } if (stage == ShaderType::Fragment) { - const auto SafeGetRegister = [&](u32 reg) { - // TODO(Rodrigo): Replace with contains once C++20 releases + const auto SafeGetRegister = [this](u32 reg) { if (const auto it = registers.find(reg); it != registers.end()) { return OpLoad(t_float, it->second); } @@ -2891,7 +2891,7 @@ private: class ExprDecompiler { public: - explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} + explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} Id operator()(const ExprAnd& expr) { const Id type_def = decomp.GetTypeDefinition(Type::Bool); @@ -2947,7 +2947,7 @@ private: class ASTDecompiler { public: - explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} + explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} void operator()(const ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 110848922..ad91ad5de 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -21,17 +21,17 @@ class VKDevice; namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using UniformTexelEntry = VideoCommon::Shader::Sampler; -using SamplerEntry = VideoCommon::Shader::Sampler; -using StorageTexelEntry = VideoCommon::Shader::Image; -using ImageEntry = VideoCommon::Shader::Image; +using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; +using SamplerEntry = VideoCommon::Shader::SamplerEntry; +using StorageTexelEntry = VideoCommon::Shader::ImageEntry; +using ImageEntry = VideoCommon::Shader::ImageEntry; constexpr u32 DESCRIPTOR_SET = 0; class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { public: - explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index) - : VideoCommon::Shader::ConstBuffer{entry}, index{index} {} + explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_) + : ConstBuffer{entry_}, index{index_} {} constexpr u32 GetIndex() const { return index; @@ -43,8 +43,8 @@ private: class GlobalBufferEntry { public: - constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {} + constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_) + : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {} constexpr u32 GetCbufIndex() const { return cbuf_index; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index c1a218d76..38a0be7f2 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -13,18 +13,13 @@ namespace Vulkan { -vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { - // Avoid undefined behavior by copying to a staging allocation - ASSERT(code_size % sizeof(u32) == 0); - const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); - std::memcpy(data.get(), code_data, code_size); - +vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code) { return device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .codeSize = code_size, - .pCode = data.get(), + .codeSize = static_cast<u32>(code.size_bytes()), + .pCode = code.data(), }); } diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index d1d3f3cae..dce34a140 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -4,6 +4,8 @@ #pragma once +#include <span> + #include "common/common_types.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -11,6 +13,6 @@ namespace Vulkan { class VKDevice; -vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); +vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 5d2c4a796..1779a2e30 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <array> #include <cstddef> #include <iterator> @@ -14,7 +15,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) -#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) +#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace Vulkan { @@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table; using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { + static constexpr std::array INVALIDATION_FLAGS{ + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, + StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, + DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + }; Flags flags{}; - flags[Viewports] = true; - flags[Scissors] = true; - flags[DepthBias] = true; - flags[BlendConstants] = true; - flags[DepthBounds] = true; - flags[StencilProperties] = true; - flags[CullMode] = true; - flags[DepthBoundsEnable] = true; - flags[DepthTestEnable] = true; - flags[DepthWriteEnable] = true; - flags[DepthCompareOp] = true; - flags[FrontFace] = true; - flags[StencilOp] = true; - flags[StencilTestEnable] = true; + for (const int flag : INVALIDATION_FLAGS) { + flags[flag] = true; + } return flags; } diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1de789e57..c335d2bdf 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -52,6 +52,14 @@ public: current_topology = INVALID_TOPOLOGY; } + void InvalidateViewports() { + flags[Dirty::Viewports] = true; + } + + void InvalidateScissors() { + flags[Dirty::Scissors] = true; + } + bool TouchViewports() { return Exchange(Dirty::Viewports, false); } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 1b59612b9..419cb154d 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -19,6 +19,10 @@ namespace Vulkan { namespace { +constexpr VkBufferUsageFlags BUFFER_USAGE = + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; @@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, } // Anonymous namespace -VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, - VkBufferUsageFlags usage) +VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_) : device{device_}, scheduler{scheduler_} { - CreateBuffers(usage); + CreateBuffers(); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); } VKStreamBuffer::~VKStreamBuffer() = default; -std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { +std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) { ASSERT(size <= stream_buffer_size); mapped_size = size; @@ -76,7 +79,6 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { WaitPendingOperations(offset); - bool invalidated = false; if (offset + size > stream_buffer_size) { // The buffer would overflow, save the amount of used watches and reset the state. invalidation_mark = current_watch_cursor; @@ -90,11 +92,9 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { // Ensure that we don't wait for uncommitted fences. scheduler.Flush(); - - invalidated = true; } - return {memory.Map(offset, size), offset, invalidated}; + return std::make_pair(memory.Map(offset, size), offset); } void VKStreamBuffer::Unmap(u64 size) { @@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) { watch.tick = scheduler.CurrentTick(); } -void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { +void VKStreamBuffer::CreateBuffers() { const auto memory_properties = device.GetPhysical().GetMemoryProperties(); const u32 preferred_type = GetMemoryType(memory_properties); const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; @@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { .pNext = nullptr, .flags = 0, .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), - .usage = usage, + .usage = BUFFER_USAGE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 5e15ad78f..1428f77bf 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -5,7 +5,7 @@ #pragma once #include <optional> -#include <tuple> +#include <utility> #include <vector> #include "common/common_types.h" @@ -19,17 +19,15 @@ class VKScheduler; class VKStreamBuffer final { public: - explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, - VkBufferUsageFlags usage); + explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler); ~VKStreamBuffer(); /** * Reserves a region of memory from the stream buffer. * @param size Size to reserve. - * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer - * offset and a boolean that's true when buffer has been invalidated. + * @returns A pair of a raw memory pointer (with offset added), and the buffer offset */ - std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); + std::pair<u8*, u64> Map(u64 size, u64 alignment); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); @@ -49,7 +47,7 @@ private: }; /// Creates Vulkan buffer handles committing the required the required memory. - void CreateBuffers(VkBufferUsageFlags usage); + void CreateBuffers(); /// Increases the amount of watches available. void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f2c8f2ae1..261808391 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -4,613 +4,1103 @@ #include <algorithm> #include <array> -#include <cstddef> -#include <cstring> -#include <memory> -#include <variant> +#include <span> #include <vector> -#include "common/assert.h" -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/morton.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/surface.h" namespace Vulkan { -using VideoCore::MortonSwizzle; -using VideoCore::MortonSwizzleMode; - +using Tegra::Engines::Fermi2D; using Tegra::Texture::SwizzleSource; -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; +using Tegra::Texture::TextureMipmapFilter; +using VideoCommon::BufferImageCopy; +using VideoCommon::ImageInfo; +using VideoCommon::ImageType; +using VideoCommon::SubresourceRange; +using VideoCore::Surface::IsPixelFormatASTC; namespace { -VkImageType SurfaceTargetToImage(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture1DArray: +constexpr std::array ATTACHMENT_REFERENCES{ + VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; + +constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { + if (color == std::array<float, 4>{0, 0, 0, 0}) { + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + } else if (color == std::array<float, 4>{0, 0, 0, 1}) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + } else if (color == std::array<float, 4>{1, 1, 1, 1}) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } + if (color[0] + color[1] + color[2] > 1.35f) { + // If color elements are brighter than roughly 0.5 average, use white border + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } else if (color[3] > 0.5f) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + } else { + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + } +} + +[[nodiscard]] VkImageType ConvertImageType(const ImageType type) { + switch (type) { + case ImageType::e1D: return VK_IMAGE_TYPE_1D; - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: + case ImageType::e2D: + case ImageType::Linear: return VK_IMAGE_TYPE_2D; - case SurfaceTarget::Texture3D: + case ImageType::e3D: return VK_IMAGE_TYPE_3D; - case SurfaceTarget::TextureBuffer: - UNREACHABLE(); - return {}; + case ImageType::Buffer: + break; } - UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); + UNREACHABLE_MSG("Invalid image type={}", type); return {}; } -VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { - if (pixel_format < PixelFormat::MaxColorFormat) { - return VK_IMAGE_ASPECT_COLOR_BIT; - } else if (pixel_format < PixelFormat::MaxDepthFormat) { - return VK_IMAGE_ASPECT_DEPTH_BIT; - } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { - return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - } else { - UNREACHABLE_MSG("Invalid pixel format={}", static_cast<int>(pixel_format)); - return VK_IMAGE_ASPECT_COLOR_BIT; +[[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) { + switch (num_samples) { + case 1: + return VK_SAMPLE_COUNT_1_BIT; + case 2: + return VK_SAMPLE_COUNT_2_BIT; + case 4: + return VK_SAMPLE_COUNT_4_BIT; + case 8: + return VK_SAMPLE_COUNT_8_BIT; + case 16: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid number of samples={}", num_samples); + return VK_SAMPLE_COUNT_1_BIT; } } -VkImageViewType GetImageViewType(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - return VK_IMAGE_VIEW_TYPE_1D; - case SurfaceTarget::Texture2D: - return VK_IMAGE_VIEW_TYPE_2D; - case SurfaceTarget::Texture3D: - return VK_IMAGE_VIEW_TYPE_3D; - case SurfaceTarget::Texture1DArray: - return VK_IMAGE_VIEW_TYPE_1D_ARRAY; - case SurfaceTarget::Texture2DArray: - return VK_IMAGE_VIEW_TYPE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return VK_IMAGE_VIEW_TYPE_CUBE; - case SurfaceTarget::TextureCubeArray: - return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; - case SurfaceTarget::TextureBuffer: - break; +[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) { + const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format); + VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + if (info.type == ImageType::e2D && info.resources.layers >= 6 && + info.size.width == info.size.height) { + flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; } - UNREACHABLE(); - return {}; -} - -vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, - std::size_t host_memory_size) { - // TODO(Rodrigo): Move texture buffer creation to the buffer cache - return device.GetLogical().CreateBuffer({ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + if (info.type == ImageType::e3D) { + flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; + } + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + if (format_info.attachable) { + switch (VideoCore::Surface::GetFormatType(info.format)) { + case VideoCore::Surface::SurfaceType::ColorTexture: + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + break; + case VideoCore::Surface::SurfaceType::Depth: + case VideoCore::Surface::SurfaceType::DepthStencil: + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + break; + default: + UNREACHABLE_MSG("Invalid surface type"); + } + } + if (format_info.storage) { + usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } + const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); + return VkImageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, - .flags = 0, - .size = static_cast<VkDeviceSize>(host_memory_size), - .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .flags = flags, + .imageType = ConvertImageType(info.type), + .format = format_info.format, + .extent = + { + .width = info.size.width >> samples_x, + .height = info.size.height >> samples_y, + .depth = info.size.depth, + }, + .mipLevels = static_cast<u32>(info.resources.levels), + .arrayLayers = static_cast<u32>(info.resources.layers), + .samples = ConvertSampleCount(info.num_samples), + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - }); -} - -VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, - const SurfaceParams& params, VkBuffer buffer, - std::size_t host_memory_size) { - ASSERT(params.IsBuffer()); - - return { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .buffer = buffer, - .format = - MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, - .offset = 0, - .range = static_cast<VkDeviceSize>(host_memory_size), + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }; } -VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { - ASSERT(!params.IsBuffer()); - - const auto [format, attachable, storage] = - MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); +[[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) { + if (info.type == ImageType::Buffer) { + return vk::Image{}; + } + return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); +} - VkImageCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, +[[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) { + if (info.type != ImageType::Buffer) { + return vk::Buffer{}; + } + const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); + return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .imageType = SurfaceTargetToImage(params.target), - .format = format, - .extent = {}, - .mipLevels = params.num_levels, - .arrayLayers = static_cast<u32>(params.GetNumLayers()), - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .size = info.size.width * bytes_per_block, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }; - if (attachable) { - ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT - : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - } - if (storage) { - ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT; - } - - switch (params.target) { - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; - [[fallthrough]]; - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture2DArray: - ci.extent = {params.width, params.height, 1}; - break; - case SurfaceTarget::Texture3D: - ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; - ci.extent = {params.width, params.height, params.depth}; - break; - case SurfaceTarget::TextureBuffer: - UNREACHABLE(); - } - - return ci; + }); } -u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) { - return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | - (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); +[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { + switch (VideoCore::Surface::GetFormatType(format)) { + case VideoCore::Surface::SurfaceType::ColorTexture: + return VK_IMAGE_ASPECT_COLOR_BIT; + case VideoCore::Surface::SurfaceType::Depth: + return VK_IMAGE_ASPECT_DEPTH_BIT; + case VideoCore::Surface::SurfaceType::DepthStencil: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + default: + UNREACHABLE_MSG("Invalid surface type"); + return VkImageAspectFlags{}; + } } -} // Anonymous namespace - -CachedSurface::CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool, - GPUVAddr gpu_addr, const SurfaceParams& params) - : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device}, - memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { - if (params.IsBuffer()) { - buffer = CreateBuffer(device, params, host_memory_size); - commit = memory_manager.Commit(buffer, false); - - const auto buffer_view_ci = - GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); - format = buffer_view_ci.format; - - buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci); - } else { - const auto image_ci = GenerateImageCreateInfo(device, params); - format = image_ci.format; - - image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format)); - commit = memory_manager.Commit(image->GetHandle(), false); +[[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) { + if (info.IsRenderTarget()) { + return ImageAspectMask(info.format); } - - // TODO(Rodrigo): Move this to a virtual function. - u32 num_layers = 1; - if (params.is_layered || params.target == SurfaceTarget::Texture3D) { - num_layers = params.depth; + const bool is_first = info.Swizzle()[0] == SwizzleSource::R; + switch (info.format) { + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; + case PixelFormat::S8_UINT_D24_UNORM: + return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; + case PixelFormat::D16_UNORM: + case PixelFormat::D32_FLOAT: + return VK_IMAGE_ASPECT_DEPTH_BIT; + default: + return VK_IMAGE_ASPECT_COLOR_BIT; } - main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels)); } -CachedSurface::~CachedSurface() = default; - -void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { - // To upload data we have to be outside of a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); +[[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device, + const ImageView* image_view) { + const auto pixel_format = image_view->format; + return VkAttachmentDescription{ + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format).format, + .samples = image_view->Samples(), + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; +} - if (params.IsBuffer()) { - UploadBuffer(staging_buffer); - } else { - UploadImage(staging_buffer); +[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { + switch (swizzle) { + case SwizzleSource::Zero: + return VK_COMPONENT_SWIZZLE_ZERO; + case SwizzleSource::R: + return VK_COMPONENT_SWIZZLE_R; + case SwizzleSource::G: + return VK_COMPONENT_SWIZZLE_G; + case SwizzleSource::B: + return VK_COMPONENT_SWIZZLE_B; + case SwizzleSource::A: + return VK_COMPONENT_SWIZZLE_A; + case SwizzleSource::OneFloat: + case SwizzleSource::OneInt: + return VK_COMPONENT_SWIZZLE_ONE; } + UNREACHABLE_MSG("Invalid swizzle={}", swizzle); + return VK_COMPONENT_SWIZZLE_ZERO; } -void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { - UNIMPLEMENTED_IF(params.IsBuffer()); - - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { - LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); +[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { + switch (type) { + case VideoCommon::ImageViewType::e1D: + return VK_IMAGE_VIEW_TYPE_1D; + case VideoCommon::ImageViewType::e2D: + return VK_IMAGE_VIEW_TYPE_2D; + case VideoCommon::ImageViewType::Cube: + return VK_IMAGE_VIEW_TYPE_CUBE; + case VideoCommon::ImageViewType::e3D: + return VK_IMAGE_VIEW_TYPE_3D; + case VideoCommon::ImageViewType::e1DArray: + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case VideoCommon::ImageViewType::e2DArray: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case VideoCommon::ImageViewType::CubeArray: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case VideoCommon::ImageViewType::Rect: + LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); + return VK_IMAGE_VIEW_TYPE_2D; + case VideoCommon::ImageViewType::Buffer: + UNREACHABLE_MSG("Texture buffers can't be image views"); + return VK_IMAGE_VIEW_TYPE_1D; } + UNREACHABLE_MSG("Invalid image view type={}", type); + return VK_IMAGE_VIEW_TYPE_2D; +} - // We can't copy images to buffers inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); +[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers( + VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) { + return VkImageSubresourceLayers{ + .aspectMask = aspect_mask, + .mipLevel = static_cast<u32>(subresource.base_level), + .baseArrayLayer = static_cast<u32>(subresource.base_layer), + .layerCount = static_cast<u32>(subresource.num_layers), + }; +} - FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); +[[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) { + return VkOffset3D{ + .x = offset3d.x, + .y = offset3d.y, + .z = offset3d.z, + }; +} - const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); - // TODO(Rodrigo): Do this in a single copy - for (u32 level = 0; level < params.num_levels; ++level) { - scheduler.Record([image = *image->GetHandle(), buffer = *buffer.handle, - copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); - }); - } - scheduler.Finish(); +[[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) { + return VkExtent3D{ + .width = static_cast<u32>(extent3d.width), + .height = static_cast<u32>(extent3d.height), + .depth = static_cast<u32>(extent3d.depth), + }; +} - // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. - std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size); +[[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy, + VkImageAspectFlags aspect_mask) noexcept { + return VkImageCopy{ + .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask), + .srcOffset = MakeOffset3D(copy.src_offset), + .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask), + .dstOffset = MakeOffset3D(copy.dst_offset), + .extent = MakeExtent3D(copy.extent), + }; } -void CachedSurface::DecorateSurfaceName() { - // TODO(Rodrigo): Add name decorations +[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( + std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { + std::vector<VkBufferCopy> result(copies.size()); + std::ranges::transform( + copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { + return VkBufferCopy{ + .srcOffset = static_cast<VkDeviceSize>(copy.src_offset + buffer_offset), + .dstOffset = static_cast<VkDeviceSize>(copy.dst_offset), + .size = static_cast<VkDeviceSize>(copy.size), + }; + }); + return result; } -View CachedSurface::CreateView(const ViewParams& params) { - // TODO(Rodrigo): Add name decorations - return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params); +[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( + std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { + struct Maker { + VkBufferImageCopy operator()(const BufferImageCopy& copy) const { + return VkBufferImageCopy{ + .bufferOffset = copy.buffer_offset + buffer_offset, + .bufferRowLength = copy.buffer_row_length, + .bufferImageHeight = copy.buffer_image_height, + .imageSubresource = + { + .aspectMask = aspect_mask, + .mipLevel = static_cast<u32>(copy.image_subresource.base_level), + .baseArrayLayer = static_cast<u32>(copy.image_subresource.base_layer), + .layerCount = static_cast<u32>(copy.image_subresource.num_layers), + }, + .imageOffset = + { + .x = copy.image_offset.x, + .y = copy.image_offset.y, + .z = copy.image_offset.z, + }, + .imageExtent = + { + .width = copy.image_extent.width, + .height = copy.image_extent.height, + .depth = copy.image_extent.depth, + }, + }; + } + size_t buffer_offset; + VkImageAspectFlags aspect_mask; + }; + if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + std::vector<VkBufferImageCopy> result(copies.size() * 2); + std::ranges::transform(copies, result.begin(), + Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); + std::ranges::transform(copies, result.begin() + copies.size(), + Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); + return result; + } else { + std::vector<VkBufferImageCopy> result(copies.size()); + std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); + return result; + } } -void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { - const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); - std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); +[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask, + const SubresourceRange& range) { + return VkImageSubresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = static_cast<u32>(range.base.level), + .levelCount = static_cast<u32>(range.extent.levels), + .baseArrayLayer = static_cast<u32>(range.base.layer), + .layerCount = static_cast<u32>(range.extent.layers), + }; +} - scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, - size = host_memory_size](vk::CommandBuffer cmdbuf) { - VkBufferCopy copy; - copy.srcOffset = 0; - copy.dstOffset = 0; - copy.size = size; - cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); +[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) { + SubresourceRange range = image_view->range; + if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { + // Slice image views always affect a single layer, but their subresource range corresponds + // to the slice. Override the value to affect a single layer. + range.base.layer = 0; + range.extent.layers = 1; + } + return MakeSubresourceRange(ImageAspectMask(image_view->format), range); +} - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = dst_buffer; - barrier.offset = 0; - barrier.size = size; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - 0, {}, barrier, {}); - }); +[[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) { + return VkImageSubresourceLayers{ + .aspectMask = ImageAspectMask(image_view->format), + .mipLevel = static_cast<u32>(image_view->range.base.level), + .baseArrayLayer = static_cast<u32>(image_view->range.base.layer), + .layerCount = static_cast<u32>(image_view->range.extent.layers), + }; } -void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { - const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); - std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); - - FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - for (u32 level = 0; level < params.num_levels; ++level) { - const VkBufferImageCopy copy = GetBufferImageCopy(level); - if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), - copy](vk::CommandBuffer cmdbuf) { - std::array<VkBufferImageCopy, 2> copies = {copy, copy}; - copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - copies); - }); - } else { - scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), - copy](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); - }); - } +[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) { + switch (value) { + case SwizzleSource::G: + return SwizzleSource::R; + default: + return value; } } -VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - return { - .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource = +void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, + VkImageAspectFlags aspect_mask, bool is_initialized, + std::span<const VkBufferImageCopy> copies) { + static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + const VkImageMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = ACCESS_FLAGS, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { - .aspectMask = image->GetAspectMask(), - .mipLevel = level, + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, .baseArrayLayer = 0, - .layerCount = static_cast<u32>(params.GetNumLayers()), + .layerCount = VK_REMAINING_ARRAY_LAYERS, }, - .imageOffset = {.x = 0, .y = 0, .z = 0}, - .imageExtent = + }; + const VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = ACCESS_FLAGS, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { - .width = params.GetMipWidth(level), - .height = params.GetMipHeight(level), - .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies); + // TODO: Move this to another API + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, + write_barrier); } -VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { - return {image->GetAspectMask(), 0, params.num_levels, 0, - static_cast<u32>(params.GetNumLayers())}; +[[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + const VkImageSubresourceLayers& dst_layers, + const VkImageSubresourceLayers& src_layers) { + return VkImageBlit{ + .srcSubresource = src_layers, + .srcOffsets = + { + { + .x = src_region[0].x, + .y = src_region[0].y, + .z = 0, + }, + { + .x = src_region[1].x, + .y = src_region[1].y, + .z = 1, + }, + }, + .dstSubresource = dst_layers, + .dstOffsets = + { + { + .x = dst_region[0].x, + .y = dst_region[0].y, + .z = 0, + }, + { + .x = dst_region[1].x, + .y = dst_region[1].y, + .z = 1, + }, + }, + }; } -CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, - const ViewParams& params) - : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, - image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, - aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, - base_level{params.base_level}, num_levels{params.num_levels}, - image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} { - if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { - base_layer = 0; - num_layers = 1; - base_slice = params.base_layer; - num_slices = params.num_layers; - } else { - base_layer = params.base_layer; - num_layers = params.num_layers; - } +[[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + const VkImageSubresourceLayers& dst_layers, + const VkImageSubresourceLayers& src_layers) { + return VkImageResolve{ + .srcSubresource = src_layers, + .srcOffset = + { + .x = src_region[0].x, + .y = src_region[0].y, + .z = 0, + }, + .dstSubresource = dst_layers, + .dstOffset = + { + .x = dst_region[0].x, + .y = dst_region[0].y, + .z = 0, + }, + .extent = + { + .width = static_cast<u32>(dst_region[1].x - dst_region[0].x), + .height = static_cast<u32>(dst_region[1].y - dst_region[0].y), + .depth = 1, + }, + }; } -CachedSurfaceView::~CachedSurfaceView() = default; - -VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { - const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (last_image_view && last_swizzle == new_swizzle) { - return last_image_view; +struct RangedBarrierRange { + u32 min_mip = std::numeric_limits<u32>::max(); + u32 max_mip = std::numeric_limits<u32>::min(); + u32 min_layer = std::numeric_limits<u32>::max(); + u32 max_layer = std::numeric_limits<u32>::min(); + + void AddLayers(const VkImageSubresourceLayers& layers) { + min_mip = std::min(min_mip, layers.mipLevel); + max_mip = std::max(max_mip, layers.mipLevel + 1); + min_layer = std::min(min_layer, layers.baseArrayLayer); + max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount); } - last_swizzle = new_swizzle; - const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); - auto& image_view = entry->second; - if (!is_cache_miss) { - return last_image_view = *image_view; + VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { + return VkImageSubresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = min_mip, + .levelCount = max_mip - min_mip, + .baseArrayLayer = min_layer, + .layerCount = max_layer - min_layer, + }; } +}; - std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), - MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { - // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. - std::swap(swizzle[0], swizzle[2]); - } +} // Anonymous namespace - // Games can sample depth or stencil values on textures. This is decided by the swizzle value on - // hardware. To emulate this on Vulkan we specify it in the aspect. - VkImageAspectFlags aspect = aspect_mask; - if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); - const bool is_first = x_source == SwizzleSource::R; - switch (params.pixel_format) { - case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT: - case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT: - aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; - break; - case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM: - aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - break; - default: - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; - UNIMPLEMENTED(); - } +void TextureCacheRuntime::Finish() { + scheduler.Finish(); +} - // Make sure we sample the first component - std::transform( - swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { - return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; - }); - } +ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { + const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true); + return ImageBufferMap{ + .handle = *buffer.handle, + .map = buffer.commit->Map(size), + }; +} - if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { - ASSERT(base_slice == 0); - ASSERT(num_slices == params.depth); +void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, + const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation) { + const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format); + const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT; + const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT; + ASSERT(aspect_mask == ImageAspectMask(dst.format)); + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { + blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, + operation); + return; } - - image_view = device.GetLogical().CreateImageView({ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = surface.GetImageHandle(), - .viewType = image_view_type, - .format = surface.GetImage().GetFormat(), - .components = - { - .r = swizzle[0], - .g = swizzle[1], - .b = swizzle[2], - .a = swizzle[3], + if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (!device.IsBlitDepthStencilSupported()) { + UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa); + blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(), + dst_region, src_region, filter, operation); + return; + } + } + ASSERT(src.ImageFormat() == dst.ImageFormat()); + ASSERT(!(is_dst_msaa && !is_src_msaa)); + ASSERT(operation == Fermi2D::Operation::SrcCopy); + + const VkImage dst_image = dst.ImageHandle(); + const VkImage src_image = src.ImageHandle(); + const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst); + const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src); + const bool is_resolve = is_src_msaa && !is_dst_msaa; + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers, + aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) { + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, }, - .subresourceRange = - { - .aspectMask = aspect, - .baseMipLevel = base_level, - .levelCount = num_levels, - .baseArrayLayer = base_layer, - .layerCount = num_layers, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, read_barriers); + if (is_resolve) { + cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + MakeImageResolve(dst_region, src_region, dst_layers, src_layers)); + } else { + const bool is_linear = filter == Fermi2D::Filter::Bilinear; + const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + cmdbuf.BlitImage( + src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter); + } + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, write_barrier); }); - - return last_image_view = *image_view; } -VkImageView CachedSurfaceView::GetAttachment() { - if (render_target) { - return *render_target; +void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + switch (dst_view.format) { + case PixelFormat::R16_UNORM: + if (src_view.format == PixelFormat::D16_UNORM) { + return blit_image_helper.ConvertD16ToR16(dst, src_view); + } + break; + case PixelFormat::R32_FLOAT: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32ToR32(dst, src_view); + } + break; + case PixelFormat::D16_UNORM: + if (src_view.format == PixelFormat::R16_UNORM) { + return blit_image_helper.ConvertR16ToD16(dst, src_view); + } + break; + case PixelFormat::D32_FLOAT: + if (src_view.format == PixelFormat::R32_FLOAT) { + return blit_image_helper.ConvertR32ToD32(dst, src_view); + } + break; + default: + break; } + UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format); +} - VkImageViewCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = surface.GetImageHandle(), - .viewType = VK_IMAGE_VIEW_TYPE_1D, - .format = surface.GetImage().GetFormat(), - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, +void TextureCacheRuntime::CopyImage(Image& dst, Image& src, + std::span<const VideoCommon::ImageCopy> copies) { + std::vector<VkImageCopy> vk_copies(copies.size()); + const VkImageAspectFlags aspect_mask = dst.AspectMask(); + ASSERT(aspect_mask == src.AspectMask()); + + std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) { + return MakeImageCopy(copy, aspect_mask); + }); + const VkImage dst_image = dst.Handle(); + const VkImage src_image = src.Handle(); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { + RangedBarrierRange dst_range; + RangedBarrierRange src_range; + for (const VkImageCopy& copy : vk_copies) { + dst_range.AddLayers(copy.dstSubresource); + src_range.AddLayers(copy.srcSubresource); + } + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = src_range.SubresourceRange(aspect_mask), }, - .subresourceRange = - { - .aspectMask = aspect_mask, - .baseMipLevel = base_level, - .levelCount = num_levels, - .baseArrayLayer = 0, - .layerCount = 0, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(aspect_mask), }, - }; - if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { - ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; - ci.subresourceRange.baseArrayLayer = base_slice; - ci.subresourceRange.layerCount = num_slices; + }; + const VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(aspect_mask), + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, {}, {}, read_barriers); + cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, write_barrier); + }); +} + +Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, + VAddr cpu_addr_) + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, + image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), + aspect_mask(ImageAspectMask(info.format)) { + if (image) { + commit = runtime.memory_manager.Commit(image, false); } else { - ci.viewType = image_view_type; - ci.subresourceRange.baseArrayLayer = base_layer; - ci.subresourceRange.layerCount = num_layers; + commit = runtime.memory_manager.Commit(buffer, false); + } + if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { + flags |= VideoCommon::ImageFlagBits::Converted; + } + if (runtime.device.HasDebuggingToolAttached()) { + if (image) { + image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); + } else { + buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); + } } - render_target = device.GetLogical().CreateImageView(ci); - return *render_target; } -VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, const VKDevice& device_, - VKMemoryManager& memory_manager_, VKScheduler& scheduler_, - VKStagingBufferPool& staging_pool_) - : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()), - device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ - staging_pool_} {} - -VKTextureCache::~VKTextureCache() = default; - -Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool, - gpu_addr, params); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const BufferImageCopy> copies) { + // TODO: Move this to another API + scheduler->RequestOutsideRenderPassOperationContext(); + std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); + const VkBuffer src_buffer = map.handle; + const VkImage vk_image = *image; + const VkImageAspectFlags vk_aspect_mask = aspect_mask; + const bool is_initialized = std::exchange(initialized, true); + scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, + vk_copies](vk::CommandBuffer cmdbuf) { + CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); + }); } -void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) { - const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; - const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; - UNIMPLEMENTED_IF(src_3d); +void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferCopy> copies) { + // TODO: Move this to another API + scheduler->RequestOutsideRenderPassOperationContext(); + std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); + const VkBuffer src_buffer = map.handle; + const VkBuffer dst_buffer = *buffer; + scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + }); +} - // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and - // dimension respectively. - const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; - const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; +void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const BufferImageCopy> copies) { + std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); + scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, + vk_copies](vk::CommandBuffer cmdbuf) { + // TODO: Barriers + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); + }); +} - const u32 extent_z = dst_3d ? copy_params.depth : 1; - const u32 num_layers = dst_3d ? 1 : copy_params.depth; +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, + ImageId image_id_, Image& image) + : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, + image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( + image.info.num_samples)} { + const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); + std::array<SwizzleSource, 4> swizzle{ + SwizzleSource::R, + SwizzleSource::G, + SwizzleSource::B, + SwizzleSource::A, + }; + if (!info.IsRenderTarget()) { + swizzle = info.Swizzle(); + if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { + std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); + } + } + const VkFormat vk_format = + MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format; + const VkImageViewCreateInfo create_info{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = image.Handle(), + .viewType = VkImageViewType{}, + .format = vk_format, + .components{ + .r = ComponentSwizzle(swizzle[0]), + .g = ComponentSwizzle(swizzle[1]), + .b = ComponentSwizzle(swizzle[2]), + .a = ComponentSwizzle(swizzle[3]), + }, + .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), + }; + const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { + VkImageViewCreateInfo ci{create_info}; + ci.viewType = ImageViewType(view_type); + if (num_layers) { + ci.subresourceRange.layerCount = *num_layers; + } + vk::ImageView handle = device->GetLogical().CreateImageView(ci); + if (device->HasDebuggingToolAttached()) { + handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); + } + image_views[static_cast<size_t>(view_type)] = std::move(handle); + }; + switch (info.type) { + case VideoCommon::ImageViewType::e1D: + case VideoCommon::ImageViewType::e1DArray: + create(VideoCommon::ImageViewType::e1D, 1); + create(VideoCommon::ImageViewType::e1DArray, std::nullopt); + render_target = Handle(VideoCommon::ImageViewType::e1DArray); + break; + case VideoCommon::ImageViewType::e2D: + case VideoCommon::ImageViewType::e2DArray: + create(VideoCommon::ImageViewType::e2D, 1); + create(VideoCommon::ImageViewType::e2DArray, std::nullopt); + render_target = Handle(VideoCommon::ImageViewType::e2DArray); + break; + case VideoCommon::ImageViewType::e3D: + create(VideoCommon::ImageViewType::e3D, std::nullopt); + render_target = Handle(VideoCommon::ImageViewType::e3D); + break; + case VideoCommon::ImageViewType::Cube: + case VideoCommon::ImageViewType::CubeArray: + create(VideoCommon::ImageViewType::Cube, 6); + create(VideoCommon::ImageViewType::CubeArray, std::nullopt); + break; + case VideoCommon::ImageViewType::Rect: + UNIMPLEMENTED(); + break; + case VideoCommon::ImageViewType::Buffer: + buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = image.Buffer(), + .format = vk_format, + .offset = 0, // TODO: Redesign buffer cache to support this + .range = image.guest_size_bytes, + }); + break; + } +} - // We can't copy inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) + : VideoCommon::ImageViewBase{params} {} - src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); +VkImageView ImageView::DepthView() { + if (depth_view) { + return *depth_view; + } + depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); + return *depth_view; +} - const VkImageCopy copy{ - .srcSubresource = - { - .aspectMask = src_surface->GetAspectMask(), - .mipLevel = copy_params.source_level, - .baseArrayLayer = copy_params.source_z, - .layerCount = num_layers, - }, - .srcOffset = - { - .x = static_cast<s32>(copy_params.source_x), - .y = static_cast<s32>(copy_params.source_y), - .z = 0, - }, - .dstSubresource = - { - .aspectMask = dst_surface->GetAspectMask(), - .mipLevel = copy_params.dest_level, - .baseArrayLayer = dst_base_layer, - .layerCount = num_layers, - }, - .dstOffset = - { - .x = static_cast<s32>(copy_params.dest_x), - .y = static_cast<s32>(copy_params.dest_y), - .z = static_cast<s32>(dst_offset_z), - }, - .extent = - { - .width = copy_params.width, - .height = copy_params.height, - .depth = extent_z, - }, - }; +VkImageView ImageView::StencilView() { + if (stencil_view) { + return *stencil_view; + } + stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); + return *stencil_view; +} - const VkImage src_image = src_surface->GetImageHandle(); - const VkImage dst_image = dst_surface->GetImageHandle(); - scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); +vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { + return device->GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = image_handle, + .viewType = ImageViewType(type), + .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format, + .components{ + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = MakeSubresourceRange(aspect_mask, range), }); } -void VKTextureCache::ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) { - // We can't blit inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT); - dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT); - - VkImageBlit blit; - blit.srcSubresource = src_view->GetImageSubresourceLayers(); - blit.srcOffsets[0].x = copy_config.src_rect.left; - blit.srcOffsets[0].y = copy_config.src_rect.top; - blit.srcOffsets[0].z = 0; - blit.srcOffsets[1].x = copy_config.src_rect.right; - blit.srcOffsets[1].y = copy_config.src_rect.bottom; - blit.srcOffsets[1].z = 1; - blit.dstSubresource = dst_view->GetImageSubresourceLayers(); - blit.dstOffsets[0].x = copy_config.dst_rect.left; - blit.dstOffsets[0].y = copy_config.dst_rect.top; - blit.dstOffsets[0].z = 0; - blit.dstOffsets[1].x = copy_config.dst_rect.right; - blit.dstOffsets[1].y = copy_config.dst_rect.bottom; - blit.dstOffsets[1].z = 1; - - const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - - scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, - is_linear](vk::CommandBuffer cmdbuf) { - cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, - is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); +Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) { + const auto& device = runtime.device; + const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported(); + const std::array<float, 4> color = tsc.BorderColor(); + // C++20 bit_cast + VkClearColorValue border_color; + std::memcpy(&border_color, &color, sizeof(color)); + const VkSamplerCustomBorderColorCreateInfoEXT border_ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .pNext = nullptr, + .customBorderColor = border_color, + .format = VK_FORMAT_UNDEFINED, + }; + const void* pnext = nullptr; + if (arbitrary_borders) { + pnext = &border_ci; + } + const VkSamplerReductionModeCreateInfoEXT reduction_ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT, + .pNext = pnext, + .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter), + }; + if (runtime.device.IsExtSamplerFilterMinmaxSupported()) { + pnext = &reduction_ci; + } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) { + LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); + } + // Some games have samplers with garbage. Sanitize them here. + const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = pnext, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.LodBias(), + .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = max_anisotropy, + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, }); } -void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { - // Currently unimplemented. PBO copies should be dropped and we should use a render pass to - // convert from color to depth and viceversa. - LOG_WARNING(Render_Vulkan, "Unimplemented"); +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { + std::vector<VkAttachmentDescription> descriptions; + std::vector<VkImageView> attachments; + RenderPassKey renderpass_key{}; + s32 num_layers = 1; + + for (size_t index = 0; index < NUM_RT; ++index) { + const ImageView* const color_buffer = color_buffers[index]; + if (!color_buffer) { + renderpass_key.color_formats[index] = PixelFormat::Invalid; + continue; + } + descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); + attachments.push_back(color_buffer->RenderTarget()); + renderpass_key.color_formats[index] = color_buffer->format; + num_layers = std::max(num_layers, color_buffer->range.extent.layers); + images[num_images] = color_buffer->ImageHandle(); + image_ranges[num_images] = MakeSubresourceRange(color_buffer); + samples = color_buffer->Samples(); + ++num_images; + } + const size_t num_colors = attachments.size(); + const VkAttachmentReference* depth_attachment = + depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; + if (depth_buffer) { + descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); + attachments.push_back(depth_buffer->RenderTarget()); + renderpass_key.depth_format = depth_buffer->format; + num_layers = std::max(num_layers, depth_buffer->range.extent.layers); + images[num_images] = depth_buffer->ImageHandle(); + image_ranges[num_images] = MakeSubresourceRange(depth_buffer); + samples = depth_buffer->Samples(); + ++num_images; + } else { + renderpass_key.depth_format = PixelFormat::Invalid; + } + renderpass_key.samples = samples; + + const auto& device = runtime.device.GetLogical(); + const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); + if (is_new) { + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast<u32>(num_colors), + .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = depth_attachment, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast<u32>(descriptions.size()), + .pAttachments = descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + } + renderpass = *cache_pair->second; + render_area = VkExtent2D{ + .width = key.size.width, + .height = key.size.height, + }; + num_color_buffers = static_cast<u32>(num_colors); + framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = renderpass, + .attachmentCount = static_cast<u32>(attachments.size()), + .pAttachments = attachments.data(), + .width = key.size.width, + .height = key.size.height, + .layers = static_cast<u32>(num_layers), + }); + if (runtime.device.HasDebuggingToolAttached()) { + framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 39202feba..edc3d80c0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -4,216 +4,265 @@ #pragma once -#include <memory> -#include <unordered_map> +#include <compare> +#include <span> -#include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/wrapper.h" -#include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/texture_cache.h" -namespace VideoCore { -class RasterizerInterface; -} - namespace Vulkan { -class RasterizerVulkan; +using VideoCommon::ImageId; +using VideoCommon::NUM_RT; +using VideoCommon::Offset2D; +using VideoCommon::RenderTargets; +using VideoCore::Surface::PixelFormat; + class VKDevice; class VKScheduler; class VKStagingBufferPool; -class CachedSurfaceView; -class CachedSurface; +class BlitImageHelper; +class Image; +class ImageView; +class Framebuffer; -using Surface = std::shared_ptr<CachedSurface>; -using View = std::shared_ptr<CachedSurfaceView>; -using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; +struct RenderPassKey { + constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; -using VideoCommon::SurfaceParams; -using VideoCommon::ViewParams; + std::array<PixelFormat, NUM_RT> color_formats; + PixelFormat depth_format; + VkSampleCountFlagBits samples; +}; -class CachedSurface final : public VideoCommon::SurfaceBase<View> { - friend CachedSurfaceView; +} // namespace Vulkan -public: - explicit CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool, - GPUVAddr gpu_addr, const SurfaceParams& params); - ~CachedSurface(); +namespace std { +template <> +struct hash<Vulkan::RenderPassKey> { + [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast<size_t>(key.depth_format) << 48; + value ^= static_cast<size_t>(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std - void UploadTexture(const std::vector<u8>& staging_buffer) override; - void DownloadTexture(std::vector<u8>& staging_buffer) override; +namespace Vulkan { - void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout) { - image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, - new_stage_mask, new_access, new_layout); +struct ImageBufferMap { + [[nodiscard]] VkBuffer Handle() const noexcept { + return handle; } - void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, - VkImageLayout new_layout) { - image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, - new_access, new_layout); + [[nodiscard]] std::span<u8> Span() const noexcept { + return map.Span(); } - VKImage& GetImage() { - return *image; - } + VkBuffer handle; + MemoryMap map; +}; - const VKImage& GetImage() const { - return *image; - } +struct TextureCacheRuntime { + const VKDevice& device; + VKScheduler& scheduler; + VKMemoryManager& memory_manager; + VKStagingBufferPool& staging_buffer_pool; + BlitImageHelper& blit_image_helper; + std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache; + + void Finish(); - VkImage GetImageHandle() const { - return *image->GetHandle(); + [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); + + [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) { + // TODO: Have a special function for this + return MapUploadBuffer(size); } - VkImageAspectFlags GetAspectMask() const { - return image->GetAspectMask(); + void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, + const std::array<Offset2D, 2>& dst_region, + const std::array<Offset2D, 2>& src_region, + Tegra::Engines::Fermi2D::Filter filter, + Tegra::Engines::Fermi2D::Operation operation); + + void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); + + [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { + return false; } - VkBufferView GetBufferViewHandle() const { - return *buffer_view; + void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, + std::span<const VideoCommon::SwizzleParameters>) { + UNREACHABLE(); } -protected: - void DecorateSurfaceName(); + void InsertUploadMemoryBarrier() {} +}; - View CreateView(const ViewParams& params) override; +class Image : public VideoCommon::ImageBase { +public: + explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, + VAddr cpu_addr); -private: - void UploadBuffer(const std::vector<u8>& staging_buffer); + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies); - void UploadImage(const std::vector<u8>& staging_buffer); + void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferCopy> copies); - VkBufferImageCopy GetBufferImageCopy(u32 level) const; + void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies); - VkImageSubresourceRange GetImageSubresourceRange() const; + [[nodiscard]] VkImage Handle() const noexcept { + return *image; + } - const VKDevice& device; - VKMemoryManager& memory_manager; - VKScheduler& scheduler; - VKStagingBufferPool& staging_pool; + [[nodiscard]] VkBuffer Buffer() const noexcept { + return *buffer; + } + + [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { + return aspect_mask; + } - std::optional<VKImage> image; +private: + VKScheduler* scheduler; + vk::Image image; vk::Buffer buffer; - vk::BufferView buffer_view; VKMemoryCommit commit; - - VkFormat format = VK_FORMAT_UNDEFINED; + VkImageAspectFlags aspect_mask = 0; + bool initialized = false; }; -class CachedSurfaceView final : public VideoCommon::ViewBase { +class ImageView : public VideoCommon::ImageViewBase { public: - explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, - const ViewParams& params); - ~CachedSurfaceView(); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); + [[nodiscard]] VkImageView DepthView(); - VkImageView GetAttachment(); + [[nodiscard]] VkImageView StencilView(); - bool IsSameSurface(const CachedSurfaceView& rhs) const { - return &surface == &rhs.surface; + [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { + return *image_views[static_cast<size_t>(query_type)]; } - u32 GetWidth() const { - return params.GetMipWidth(base_level); + [[nodiscard]] VkBufferView BufferView() const noexcept { + return *buffer_view; } - u32 GetHeight() const { - return params.GetMipHeight(base_level); + [[nodiscard]] VkImage ImageHandle() const noexcept { + return image_handle; } - u32 GetNumLayers() const { - return num_layers; + [[nodiscard]] VkImageView RenderTarget() const noexcept { + return render_target; } - bool IsBufferView() const { - return buffer_view; + [[nodiscard]] PixelFormat ImageFormat() const noexcept { + return image_format; } - VkImage GetImage() const { - return image; + [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { + return samples; } - VkBufferView GetBufferView() const { - return buffer_view; - } +private: + [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); - VkImageSubresourceRange GetImageSubresourceRange() const { - return {aspect_mask, base_level, num_levels, base_layer, num_layers}; - } + const VKDevice* device = nullptr; + std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; + vk::ImageView depth_view; + vk::ImageView stencil_view; + vk::BufferView buffer_view; + VkImage image_handle = VK_NULL_HANDLE; + VkImageView render_target = VK_NULL_HANDLE; + PixelFormat image_format = PixelFormat::Invalid; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; +}; - VkImageSubresourceLayers GetImageSubresourceLayers() const { - return {surface.GetAspectMask(), base_level, base_layer, num_layers}; - } +class ImageAlloc : public VideoCommon::ImageAllocBase {}; - void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, - VkAccessFlags new_access) const { - surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, - new_access, new_layout); - } +class Sampler { +public: + explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); - void MarkAsModified(u64 tick) { - surface.MarkAsModified(true, tick); + [[nodiscard]] VkSampler Handle() const noexcept { + return *sampler; } private: - // Store a copy of these values to avoid double dereference when reading them - const SurfaceParams params; - const VkImage image; - const VkBufferView buffer_view; - const VkImageAspectFlags aspect_mask; - - const VKDevice& device; - CachedSurface& surface; - const u32 base_level; - const u32 num_levels; - const VkImageViewType image_view_type; - u32 base_layer = 0; - u32 num_layers = 0; - u32 base_slice = 0; - u32 num_slices = 0; - - VkImageView last_image_view = nullptr; - u32 last_swizzle = 0; - - vk::ImageView render_target; - std::unordered_map<u32, vk::ImageView> view_cache; + vk::Sampler sampler; }; -class VKTextureCache final : public TextureCacheBase { +class Framebuffer { public: - explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool); - ~VKTextureCache(); + explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key); -private: - Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; + [[nodiscard]] VkFramebuffer Handle() const noexcept { + return *framebuffer; + } - void ImageCopy(Surface& src_surface, Surface& dst_surface, - const VideoCommon::CopyParams& copy_params) override; + [[nodiscard]] VkRenderPass RenderPass() const noexcept { + return renderpass; + } - void ImageBlit(View& src_view, View& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) override; + [[nodiscard]] VkExtent2D RenderArea() const noexcept { + return render_area; + } - void BufferCopy(Surface& src_surface, Surface& dst_surface) override; + [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { + return samples; + } - const VKDevice& device; - VKMemoryManager& memory_manager; - VKScheduler& scheduler; - VKStagingBufferPool& staging_pool; + [[nodiscard]] u32 NumColorBuffers() const noexcept { + return num_color_buffers; + } + + [[nodiscard]] u32 NumImages() const noexcept { + return num_images; + } + + [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept { + return images; + } + + [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept { + return image_ranges; + } + +private: + vk::Framebuffer framebuffer; + VkRenderPass renderpass{}; + VkExtent2D render_area{}; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + u32 num_color_buffers = 0; + u32 num_images = 0; + std::array<VkImage, 9> images{}; + std::array<VkImageSubresourceRange, 9> image_ranges{}; +}; + +struct TextureCacheParams { + static constexpr bool ENABLE_VALIDATION = true; + static constexpr bool FRAMEBUFFER_BLITS = false; + static constexpr bool HAS_EMULATED_COPIES = false; + + using Runtime = Vulkan::TextureCacheRuntime; + using Image = Vulkan::Image; + using ImageAlloc = Vulkan::ImageAlloc; + using ImageView = Vulkan::ImageView; + using Sampler = Vulkan::Sampler; + using Framebuffer = Vulkan::Framebuffer; }; +using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 351c048d2..8826da325 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -14,8 +14,8 @@ namespace Vulkan { -VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler) - : device{device}, scheduler{scheduler} {} +VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_) + : device{device_}, scheduler{scheduler_} {} VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 945320c72..f098a8540 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -31,7 +31,7 @@ struct DescriptorUpdateEntry { class VKUpdateDescriptorQueue final { public: - explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler); + explicit VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_); ~VKUpdateDescriptorQueue(); void TickFrame(); @@ -40,30 +40,34 @@ public: void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); - void AddSampledImage(VkSampler sampler, VkImageView image_view) { - payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); + void AddSampledImage(VkImageView image_view, VkSampler sampler) { + payload.emplace_back(VkDescriptorImageInfo{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }); } void AddImage(VkImageView image_view) { - payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); + payload.emplace_back(VkDescriptorImageInfo{ + .sampler = VK_NULL_HANDLE, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }); } - void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { - payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); + void AddBuffer(VkBuffer buffer, u64 offset, size_t size) { + payload.emplace_back(VkDescriptorBufferInfo{ + .buffer = buffer, + .offset = offset, + .range = size, + }); } void AddTexelBuffer(VkBufferView texel_buffer) { payload.emplace_back(texel_buffer); } - VkImageLayout* LastImageLayout() { - return &payload.back().image.imageLayout; - } - - const VkImageLayout* LastImageLayout() const { - return &payload.back().image.imageLayout; - } - private: const VKDevice& device; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 4e83303d8..2a21e850d 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -81,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdBeginQuery); X(vkCmdBeginRenderPass); X(vkCmdBeginTransformFeedbackEXT); + X(vkCmdBeginDebugUtilsLabelEXT); X(vkCmdBindDescriptorSets); X(vkCmdBindIndexBuffer); X(vkCmdBindPipeline); @@ -98,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdEndQuery); X(vkCmdEndRenderPass); X(vkCmdEndTransformFeedbackEXT); + X(vkCmdEndDebugUtilsLabelEXT); X(vkCmdFillBuffer); X(vkCmdPipelineBarrier); X(vkCmdPushConstants); @@ -121,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); + X(vkCmdResolveImage); X(vkCreateBuffer); X(vkCreateBufferView); X(vkCreateCommandPool); @@ -176,6 +179,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkQueueSubmit); X(vkResetFences); X(vkResetQueryPoolEXT); + X(vkSetDebugUtilsObjectNameEXT); + X(vkSetDebugUtilsObjectTagEXT); X(vkUnmapMemory); X(vkUpdateDescriptorSetWithTemplateKHR); X(vkUpdateDescriptorSets); @@ -184,6 +189,19 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { #undef X } +template <typename T> +void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type, + const char* name) { + const VkDebugUtilsObjectNameInfoEXT name_info{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, + .pNext = nullptr, + .objectType = VK_OBJECT_TYPE_IMAGE, + .objectHandle = reinterpret_cast<u64>(handle), + .pObjectName = name, + }; + Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info)); +} + } // Anonymous namespace bool Load(InstanceDispatch& dld) noexcept { @@ -417,7 +435,7 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe } Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions, - InstanceDispatch& dld) noexcept { + InstanceDispatch& dispatch) noexcept { const VkApplicationInfo application_info{ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pNext = nullptr, @@ -439,17 +457,17 @@ Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char }; VkInstance instance; - if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { + if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { // Failed to create the instance. return {}; } - if (!Proc(dld.vkDestroyInstance, dld, "vkDestroyInstance", instance)) { + if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) { // We successfully created an instance but the destroy function couldn't be loaded. // This is a good moment to panic. return {}; } - return Instance(instance, dld); + return Instance(instance, dispatch); } std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() { @@ -476,8 +494,7 @@ DebugCallback Instance::TryCreateDebugCallback( VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, .pfnUserCallback = callback, .pUserData = nullptr, }; @@ -493,10 +510,38 @@ void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); } +void Buffer::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); +} + +void BufferView::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); +} + void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { Check(dld->vkBindImageMemory(owner, handle, memory, offset)); } +void Image::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); +} + +void ImageView::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); +} + +void DeviceMemory::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); +} + +void Fence::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name); +} + +void Framebuffer::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name); +} + DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { const std::size_t num = ai.descriptorSetCount; std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); @@ -510,6 +555,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c } } +void DescriptorPool::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name); +} + CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { const VkCommandBufferAllocateInfo ai{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, @@ -530,6 +579,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev } } +void CommandPool::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name); +} + std::vector<VkImage> SwapchainKHR::GetImages() const { u32 num; Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); @@ -538,9 +591,21 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { return images; } +void Event::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name); +} + +void ShaderModule::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); +} + +void Semaphore::SetObjectNameEXT(const char* name) const { + SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); +} + Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, Span<const char*> enabled_extensions, const void* next, - DeviceDispatch& dld) noexcept { + DeviceDispatch& dispatch) noexcept { const VkDeviceCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = next, @@ -555,11 +620,11 @@ Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreate }; VkDevice device; - if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { + if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { return {}; } - Load(device, dld); - return Device(device, dld); + Load(device, dispatch); + return Device(device, dispatch); } Queue Device::GetQueue(u32 family_index) const noexcept { diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index f64919623..f9a184e00 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -9,6 +9,7 @@ #include <limits> #include <memory> #include <optional> +#include <span> #include <type_traits> #include <utility> #include <vector> @@ -18,6 +19,10 @@ #include "common/common_types.h" +#ifdef _MSC_VER +#pragma warning(disable : 26812) // Disable prefer enum class over enum +#endif + namespace Vulkan::vk { /** @@ -41,6 +46,9 @@ public: /// Construct an empty span. constexpr Span() noexcept = default; + /// Construct an empty span + constexpr Span(std::nullptr_t) noexcept {} + /// Construct a span from a single element. constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} @@ -52,7 +60,7 @@ public: /// Construct a span from a pointer and a size. /// This is inteded for subranges. - constexpr Span(const T* ptr, std::size_t num) noexcept : ptr{ptr}, num{num} {} + constexpr Span(const T* ptr_, std::size_t num_) noexcept : ptr{ptr_}, num{num_} {} /// Returns the data pointer by the span. constexpr const T* data() const noexcept { @@ -177,6 +185,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdBeginQuery vkCmdBeginQuery; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT; PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; PFN_vkCmdBindPipeline vkCmdBindPipeline; @@ -194,6 +203,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdEndQuery vkCmdEndQuery; PFN_vkCmdEndRenderPass vkCmdEndRenderPass; PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT; PFN_vkCmdFillBuffer vkCmdFillBuffer; PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; PFN_vkCmdPushConstants vkCmdPushConstants; @@ -217,6 +227,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; + PFN_vkCmdResolveImage vkCmdResolveImage; PFN_vkCreateBuffer vkCreateBuffer; PFN_vkCreateBufferView vkCreateBufferView; PFN_vkCreateCommandPool vkCreateCommandPool; @@ -272,6 +283,8 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkQueueSubmit vkQueueSubmit; PFN_vkResetFences vkResetFences; PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; + PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT; + PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT; PFN_vkUnmapMemory vkUnmapMemory; PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; @@ -469,9 +482,10 @@ public: PoolAllocations() = default; /// Construct an allocation. Errors are reported through IsOutOfPoolMemory(). - explicit PoolAllocations(std::unique_ptr<AllocationType[]> allocations, std::size_t num, - VkDevice device, PoolType pool, const DeviceDispatch& dld) noexcept - : allocations{std::move(allocations)}, num{num}, device{device}, pool{pool}, dld{&dld} {} + explicit PoolAllocations(std::unique_ptr<AllocationType[]> allocations_, std::size_t num_, + VkDevice device_, PoolType pool_, const DeviceDispatch& dld_) noexcept + : allocations{std::move(allocations_)}, num{num_}, device{device_}, pool{pool_}, + dld{&dld_} {} /// Copying Vulkan allocations is not supported and will never be. PoolAllocations(const PoolAllocations&) = delete; @@ -541,18 +555,14 @@ private: const DeviceDispatch* dld = nullptr; }; -using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>; using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; -using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>; -using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>; using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; -using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>; using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; @@ -565,7 +575,7 @@ class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> { public: /// Creates a Vulkan instance. Use "operator bool" for error handling. static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions, - InstanceDispatch& dld) noexcept; + InstanceDispatch& dispatch) noexcept; /// Enumerates physical devices. /// @return Physical devices and an empty handle on failure. @@ -581,7 +591,8 @@ public: constexpr Queue() noexcept = default; /// Construct a queue handle. - constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} + constexpr Queue(VkQueue queue_, const DeviceDispatch& dld_) noexcept + : queue{queue_}, dld{&dld_} {} VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence = VK_NULL_HANDLE) const noexcept { @@ -603,6 +614,17 @@ class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> { public: /// Attaches a memory allocation. void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> { + using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { @@ -611,12 +633,26 @@ class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { public: /// Attaches a memory allocation. void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + +class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> { + using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + u8* Map(VkDeviceSize offset, VkDeviceSize size) const { void* data; Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); @@ -632,6 +668,9 @@ class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> { using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { return dld->vkWaitForFences(owner, 1, &handle, true, timeout); } @@ -645,11 +684,22 @@ public: } }; +class Framebuffer : public Handle<VkFramebuffer, VkDevice, DeviceDispatch> { + using Handle<VkFramebuffer, VkDevice, DeviceDispatch>::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; public: DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { @@ -658,6 +708,9 @@ class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { public: CommandBuffers Allocate(std::size_t num_buffers, VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; + + /// Set object name. + void SetObjectNameEXT(const char* name) const; }; class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { @@ -671,15 +724,29 @@ class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> { using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + VkResult GetStatus() const noexcept { return dld->vkGetEventStatus(owner, handle); } }; +class ShaderModule : public Handle<VkShaderModule, VkDevice, DeviceDispatch> { + using Handle<VkShaderModule, VkDevice, DeviceDispatch>::Handle; + +public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; +}; + class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; public: + /// Set object name. + void SetObjectNameEXT(const char* name) const; + [[nodiscard]] u64 GetCounter() const { u64 value; Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); @@ -720,7 +787,7 @@ class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { public: static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, Span<const char*> enabled_extensions, const void* next, - DeviceDispatch& dld) noexcept; + DeviceDispatch& dispatch) noexcept; Queue GetQueue(u32 family_index) const noexcept; @@ -809,8 +876,9 @@ class PhysicalDevice { public: constexpr PhysicalDevice() noexcept = default; - constexpr PhysicalDevice(VkPhysicalDevice physical_device, const InstanceDispatch& dld) noexcept - : physical_device{physical_device}, dld{&dld} {} + constexpr PhysicalDevice(VkPhysicalDevice physical_device_, + const InstanceDispatch& dld_) noexcept + : physical_device{physical_device_}, dld{&dld_} {} constexpr operator VkPhysicalDevice() const noexcept { return physical_device; @@ -849,8 +917,8 @@ class CommandBuffer { public: CommandBuffer() noexcept = default; - explicit CommandBuffer(VkCommandBuffer handle, const DeviceDispatch& dld) noexcept - : handle{handle}, dld{&dld} {} + explicit CommandBuffer(VkCommandBuffer handle_, const DeviceDispatch& dld_) noexcept + : handle{handle_}, dld{&dld_} {} const VkCommandBuffer* address() const noexcept { return &handle; @@ -929,6 +997,12 @@ public: regions.data(), filter); } + void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span<VkImageResolve> regions) { + dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data()); + } + void Dispatch(u32 x, u32 y, u32 z) const noexcept { dld->vkCmdDispatch(handle, x, y, z); } @@ -943,6 +1017,23 @@ public: image_barriers.size(), image_barriers.data()); } + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags = 0) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {}); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + const VkBufferMemoryBarrier& buffer_barrier) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + const VkImageMemoryBarrier& image_barrier) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier); + } + void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, Span<VkBufferImageCopy> regions) const noexcept { dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), @@ -976,6 +1067,13 @@ public: dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); } + template <typename T> + void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, + const T& data) const noexcept { + static_assert(std::is_trivially_copyable_v<T>, "<data> is not trivially copyable"); + dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast<u32>(sizeof(T)), &data); + } + void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); } @@ -1085,6 +1183,20 @@ public: counter_buffers, counter_buffer_offsets); } + void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { + const VkDebugUtilsLabelEXT label_info{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + .pNext = nullptr, + .pLabelName = label, + .color{color[0], color[1], color[2], color[3]}, + }; + dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); + } + + void EndDebugUtilsLabelEXT() const noexcept { + dld->vkCmdEndDebugUtilsLabelEXT(handle); + } + private: VkCommandBuffer handle; const DeviceDispatch* dld; diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp deleted file mode 100644 index 53c7ef12d..000000000 --- a/src/video_core/sampler_cache.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/cityhash.h" -#include "common/common_types.h" -#include "video_core/sampler_cache.h" - -namespace VideoCommon { - -std::size_t SamplerCacheKey::Hash() const { - static_assert(sizeof(raw) % sizeof(u64) == 0); - return static_cast<std::size_t>( - Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); -} - -bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { - return raw == rhs.raw; -} - -} // namespace VideoCommon diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h deleted file mode 100644 index cbe3ad071..000000000 --- a/src/video_core/sampler_cache.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <cstddef> -#include <unordered_map> - -#include "video_core/textures/texture.h" - -namespace VideoCommon { - -struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { - std::size_t Hash() const; - - bool operator==(const SamplerCacheKey& rhs) const; - - bool operator!=(const SamplerCacheKey& rhs) const { - return !operator==(rhs); - } -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash<VideoCommon::SamplerCacheKey> { - std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace VideoCommon { - -template <typename SamplerType, typename SamplerStorageType> -class SamplerCache { -public: - SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) { - const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); - auto& sampler = entry->second; - if (is_cache_miss) { - sampler = CreateSampler(tsc); - } - return ToSamplerType(sampler); - } - -protected: - virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0; - - virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0; - -private: - std::unordered_map<SamplerCacheKey, SamplerStorageType> cache; -}; - -} // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index 3f96d9076..db11144c7 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp @@ -212,16 +212,15 @@ public: } void operator()(const ExprPredicate& expr) { - inner += "P" + std::to_string(expr.predicate); + inner += fmt::format("P{}", expr.predicate); } void operator()(const ExprCondCode& expr) { - u32 cc = static_cast<u32>(expr.cc); - inner += "CC" + std::to_string(cc); + inner += fmt::format("CC{}", expr.cc); } void operator()(const ExprVar& expr) { - inner += "V" + std::to_string(expr.var_index); + inner += fmt::format("V{}", expr.var_index); } void operator()(const ExprBoolean& expr) { @@ -229,7 +228,7 @@ public: } void operator()(const ExprGprEqual& expr) { - inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; + inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); } const std::string& GetResult() const { @@ -374,8 +373,8 @@ std::string ASTManager::Print() const { return printer.GetResult(); } -ASTManager::ASTManager(bool full_decompile, bool disable_else_derivation) - : full_decompile{full_decompile}, disable_else_derivation{disable_else_derivation} {}; +ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) + : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} ASTManager::~ASTManager() { Clear(); diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index 8e5a22ab3..dc49b369e 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -76,7 +76,7 @@ public: class ASTIfThen { public: - explicit ASTIfThen(Expr condition) : condition{std::move(condition)} {} + explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} Expr condition; ASTZipper nodes{}; }; @@ -88,63 +88,68 @@ public: class ASTBlockEncoded { public: - explicit ASTBlockEncoded(u32 start, u32 end) : start{start}, end{end} {} + explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} u32 start; u32 end; }; class ASTBlockDecoded { public: - explicit ASTBlockDecoded(NodeBlock&& new_nodes) : nodes(std::move(new_nodes)) {} + explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} NodeBlock nodes; }; class ASTVarSet { public: - explicit ASTVarSet(u32 index, Expr condition) : index{index}, condition{std::move(condition)} {} + explicit ASTVarSet(u32 index_, Expr condition_) + : index{index_}, condition{std::move(condition_)} {} + u32 index; Expr condition; }; class ASTLabel { public: - explicit ASTLabel(u32 index) : index{index} {} + explicit ASTLabel(u32 index_) : index{index_} {} u32 index; bool unused{}; }; class ASTGoto { public: - explicit ASTGoto(Expr condition, u32 label) : condition{std::move(condition)}, label{label} {} + explicit ASTGoto(Expr condition_, u32 label_) + : condition{std::move(condition_)}, label{label_} {} + Expr condition; u32 label; }; class ASTDoWhile { public: - explicit ASTDoWhile(Expr condition) : condition{std::move(condition)} {} + explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} Expr condition; ASTZipper nodes{}; }; class ASTReturn { public: - explicit ASTReturn(Expr condition, bool kills) - : condition{std::move(condition)}, kills{kills} {} + explicit ASTReturn(Expr condition_, bool kills_) + : condition{std::move(condition_)}, kills{kills_} {} + Expr condition; bool kills; }; class ASTBreak { public: - explicit ASTBreak(Expr condition) : condition{std::move(condition)} {} + explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} Expr condition; }; class ASTBase { public: - explicit ASTBase(ASTNode parent, ASTData data) - : data{std::move(data)}, parent{std::move(parent)} {} + explicit ASTBase(ASTNode parent_, ASTData data_) + : data{std::move(data_)}, parent{std::move(parent_)} {} template <class U, class... Args> static ASTNode Make(ASTNode parent, Args&&... args) { @@ -300,7 +305,7 @@ private: class ASTManager final { public: - ASTManager(bool full_decompile, bool disable_else_derivation); + explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); ~ASTManager(); ASTManager(const ASTManager& o) = delete; diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 6920afdf2..09f93463b 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -13,7 +13,7 @@ namespace VideoCommon::Shader { -AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} +AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} AsyncShaders::~AsyncShaders() { KillWorkers(); @@ -137,10 +137,9 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - Vulkan::VKRenderPassCache& renderpass_cache, std::vector<VkDescriptorSetLayoutBinding> bindings, Vulkan::SPIRVProgram program, - Vulkan::GraphicsPipelineCacheKey key) { + Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { std::unique_lock lock(queue_mutex); pending_queue.push({ .backend = Backend::Vulkan, @@ -149,10 +148,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, .scheduler = &scheduler, .descriptor_pool = &descriptor_pool, .update_descriptor_queue = &update_descriptor_queue, - .renderpass_cache = &renderpass_cache, .bindings = std::move(bindings), .program = std::move(program), .key = key, + .num_color_buffers = num_color_buffers, }); cv.notify_one(); } @@ -205,8 +204,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context } else if (work.backend == Backend::Vulkan) { auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( *work.vk_device, *work.scheduler, *work.descriptor_pool, - *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, - work.program); + *work.update_descriptor_queue, work.key, work.bindings, work.program, + work.num_color_buffers); work.pp_cache->EmplacePipeline(std::move(pipeline)); } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 7a99e1dc5..004e214a8 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -66,7 +66,7 @@ public: Tegra::Engines::ShaderType shader_type; }; - explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); + explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); ~AsyncShaders(); /// Start up shader worker threads @@ -98,9 +98,9 @@ public: Vulkan::VKScheduler& scheduler, Vulkan::VKDescriptorPool& descriptor_pool, Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - Vulkan::VKRenderPassCache& renderpass_cache, std::vector<VkDescriptorSetLayoutBinding> bindings, - Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); + Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, + u32 num_color_buffers); private: void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); @@ -127,10 +127,10 @@ private: Vulkan::VKScheduler* scheduler; Vulkan::VKDescriptorPool* descriptor_pool; Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; - Vulkan::VKRenderPassCache* renderpass_cache; std::vector<VkDescriptorSetLayoutBinding> bindings; Vulkan::SPIRVProgram program; Vulkan::GraphicsPipelineCacheKey key; + u32 num_color_buffers; }; std::condition_variable cv; diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 4c8971615..43d965f2f 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -66,8 +66,8 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) - : program_code{program_code}, registry{registry}, start{start} {} + explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) + : program_code{program_code_}, registry{registry_}, start{start_} {} const ProgramCode& program_code; Registry& registry; @@ -241,10 +241,10 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) ParseInfo parse_info{}; SingleBranch single_branch{}; - const auto insert_label = [](CFGRebuildState& state, u32 address) { - const auto pair = state.labels.emplace(address); + const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { + const auto pair = rebuild_state.labels.emplace(label_address); if (pair.second) { - state.inspect_queries.push_back(address); + rebuild_state.inspect_queries.push_back(label_address); } }; @@ -257,7 +257,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) single_branch.ignore = false; break; } - if (state.registered.count(offset) != 0) { + if (state.registered.contains(offset)) { single_branch.address = offset; single_branch.ignore = true; break; @@ -632,12 +632,12 @@ void DecompileShader(CFGRebuildState& state) { for (auto label : state.labels) { state.manager->DeclareLabel(label); } - for (auto& block : state.block_info) { - if (state.labels.count(block.start) != 0) { + for (const auto& block : state.block_info) { + if (state.labels.contains(block.start)) { state.manager->InsertLabel(block.start); } const bool ignore = BlockBranchIsIgnored(block.branch); - u32 end = ignore ? block.end + 1 : block.end; + const u32 end = ignore ? block.end + 1 : block.end; state.manager->InsertBlock(block.start, end); if (!ignore) { InsertBranch(*state.manager, block.branch); @@ -737,7 +737,7 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, auto back = result_out->blocks.begin(); auto next = std::next(back); while (next != result_out->blocks.end()) { - if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { + if (!state.labels.contains(next->start) && next->start == back->end + 1) { back->end = next->end; next = result_out->blocks.erase(next); continue; diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 62a3510d8..37bf96492 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -42,10 +42,10 @@ struct Condition { class SingleBranch { public: SingleBranch() = default; - SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, - bool ignore) - : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, - ignore{ignore} {} + explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, + bool is_brk_, bool ignore_) + : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, + ignore{ignore_} {} bool operator==(const SingleBranch& b) const { return std::tie(condition, address, kill, is_sync, is_brk, ignore) == @@ -65,15 +65,15 @@ public: }; struct CaseBranch { - CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} + explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} u32 cmp_value; u32 address; }; class MultiBranch { public: - MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches) - : gpr{gpr}, branches{std::move(branches)} {} + explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_) + : gpr{gpr_}, branches{std::move(branches_)} {} u32 gpr{}; std::vector<CaseBranch> branches{}; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index eeac328a6..6576d1208 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -25,7 +25,7 @@ using Tegra::Shader::OpCode; namespace { void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, - const std::list<Sampler>& used_samplers) { + const std::list<SamplerEntry>& used_samplers) { if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { return; } @@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, } } -std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, +std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, VideoCore::GuestDriverProfile& gpu_driver, - const std::list<Sampler>& used_samplers) { + const std::list<SamplerEntry>& used_samplers) { const u32 base_offset = sampler_to_deduce.offset; u32 max_offset{std::numeric_limits<u32>::max()}; for (const auto& sampler : used_samplers) { @@ -66,7 +66,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, class ASTDecoder { public: - ASTDecoder(ShaderIR& ir) : ir(ir) {} + explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} void operator()(ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); @@ -153,8 +153,8 @@ void ShaderIR::Decode() { const auto& blocks = shader_info.blocks; NodeBlock current_block; u32 current_label = static_cast<u32>(exit_branch); - for (auto& block : blocks) { - if (shader_info.labels.count(block.start) != 0) { + for (const auto& block : blocks) { + if (shader_info.labels.contains(block.start)) { insert_block(current_block, current_label); current_block.clear(); current_label = block.start; diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index afef5948d..15eb700e7 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -110,8 +110,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case SubOp::Sqrt: return Operation(OperationCode::FSqrt, PRECISE, op_a); default: - UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", - static_cast<unsigned>(instr.sub_op.Value())); + UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); return Immediate(0); } }(); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 73155966f..7b5bb7003 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -83,7 +83,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case IAdd3Height::UpperHalfWord: return BitfieldExtract(value, 16, 16); default: - UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height)); + UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); return Immediate(0); } }; @@ -258,7 +258,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::LEA_IMM: case OpCode::Id::LEA_RZ: case OpCode::Id::LEA_HI: { - auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { + auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::LEA_R2: { return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), @@ -294,8 +294,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), "Unhandled LEA Predicate"); - Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c)); - value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value)); + Node value = + Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); + value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); SetRegister(bb, instr.gpr0, std::move(value)); break; diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 2a30aab2b..73580277a 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -72,7 +72,7 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation case LogicOperation::PassB: return op_b; default: - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); + UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); return Immediate(0); } }(); @@ -92,8 +92,7 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation break; } default: - UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", - static_cast<u32>(predicate_mode)); + UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); } } diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index b9989c88c..fea7a54df 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -244,7 +244,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { return Operation(OperationCode::FTrunc, value); default: UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - static_cast<u32>(instr.conversion.f2f.rounding.Value())); + instr.conversion.f2f.rounding.Value()); return value; } }(); @@ -300,7 +300,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { return Operation(OperationCode::FTrunc, PRECISE, value); default: UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", - static_cast<u32>(instr.conversion.f2i.rounding.Value())); + instr.conversion.f2i.rounding.Value()); return Immediate(0); } }(); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index b2e88fa20..fa83108cd 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -22,13 +22,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - PredCondition cond; - bool bf; - bool ftz; - bool neg_a; - bool abs_a; - bool neg_b; - bool abs_b; + PredCondition cond{}; + bool bf = false; + bool ftz = false; + bool neg_a = false; + bool abs_a = false; + bool neg_b = false; + bool abs_b = false; switch (opcode->get().GetId()) { case OpCode::Id::HSET2_C: case OpCode::Id::HSET2_IMM: diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 1ed4212ee..5470e8cf4 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -358,9 +358,9 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { instr.suldst.GetStoreDataLayout() != StoreType::Bits64); auto descriptor = [this, instr] { - std::optional<Tegra::Engines::SamplerDescriptor> descriptor; + std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor; if (instr.suldst.is_immediate) { - descriptor = + sampler_descriptor = registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); } else { const Node image_register = GetRegister(instr.gpr39); @@ -368,12 +368,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { static_cast<s64>(global_code.size())); const auto buffer = std::get<1>(result); const auto offset = std::get<2>(result); - descriptor = registry.ObtainBindlessSampler(buffer, offset); + sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); } - if (!descriptor) { + if (!sampler_descriptor) { UNREACHABLE_MSG("Failed to obtain image descriptor"); } - return *descriptor; + return *sampler_descriptor; }(); const auto comp_mask = GetImageComponentMask(descriptor.format); @@ -497,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { return pc; } -Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { +ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { const auto offset = static_cast<u32>(image.index.Value()); - const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [offset](const Image& entry) { return entry.offset == offset; }); + const auto it = + std::find_if(std::begin(used_images), std::end(used_images), + [offset](const ImageEntry& entry) { return entry.offset == offset; }); if (it != std::end(used_images)) { ASSERT(!it->is_bindless && it->type == type); return *it; @@ -511,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t return used_images.emplace_back(next_index, offset, type); } -Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { +ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register = GetRegister(reg); const auto result = TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); @@ -520,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im const auto offset = std::get<2>(result); const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [buffer, offset](const Image& entry) { + [buffer, offset](const ImageEntry& entry) { return entry.buffer == buffer && entry.offset == offset; }); if (it != std::end(used_images)) { diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e2bba88dd..50f4e7d35 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -47,7 +47,7 @@ OperationCode GetAtomOperation(AtomicOp op) { case AtomicOp::Exch: return OperationCode::AtomicIExchange; default: - UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); + UNIMPLEMENTED_MSG("op={}", op); return OperationCode::AtomicIAdd; } } @@ -83,7 +83,7 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { case Tegra::Shader::UniformType::UnsignedQuad: return 128; default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); + UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); return 32; } } @@ -175,12 +175,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } default: - UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); + UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); } break; } case OpCode::Id::LD_L: - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); + LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); [[fallthrough]]; case OpCode::Id::LD_S: { const auto GetAddress = [&](s32 offset) { @@ -224,7 +224,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } default: UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), - static_cast<u32>(instr.ldst_sl.type.Value())); + instr.ldst_sl.type.Value()); } break; } @@ -306,8 +306,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ST_L: - LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", - static_cast<u64>(instr.st_l.cache_management.Value())); + LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); [[fallthrough]]; case OpCode::Id::ST_S: { const auto GetAddress = [&](s32 offset) { @@ -340,7 +339,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } default: UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), - static_cast<u32>(instr.ldst_sl.type.Value())); + instr.ldst_sl.type.Value()); } break; } @@ -387,7 +386,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } case OpCode::Id::RED: { UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", - static_cast<int>(instr.red.type.Value())); + instr.red.type.Value()); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); if (!real_address || !base_address) { @@ -403,12 +402,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || instr.atom.operation == AtomicOp::Dec || instr.atom.operation == AtomicOp::SafeAdd, - "operation={}", static_cast<int>(instr.atom.operation.Value())); + "operation={}", instr.atom.operation.Value()); UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || instr.atom.type == GlobalAtomicType::U64 || instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || instr.atom.type == GlobalAtomicType::F32_FTZ_RN, - "type={}", static_cast<int>(instr.atom.type.Value())); + "type={}", instr.atom.type.Value()); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); @@ -428,10 +427,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { case OpCode::Id::ATOMS: { UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || instr.atoms.operation == AtomicOp::Dec, - "operation={}", static_cast<int>(instr.atoms.operation.Value())); + "operation={}", instr.atoms.operation.Value()); UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || instr.atoms.type == AtomicType::U64, - "type={}", static_cast<int>(instr.atoms.type.Value())); + "type={}", instr.atoms.type.Value()); const bool is_signed = instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; const s32 offset = instr.atoms.GetImmediateOffset(); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 29a7cfbfe..d3ea07aac 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -34,14 +34,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::EXIT: { - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", - static_cast<u32>(cc)); + const ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); switch (instr.flow.cond) { case Tegra::Shader::FlowCondition::Always: bb.push_back(Operation(OperationCode::Exit)); - if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) { + if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { // If this is an unconditional exit then just end processing here, // otherwise we have to account for the possibility of the condition // not being met, so continue processing the next instruction. @@ -56,17 +55,15 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; default: - UNIMPLEMENTED_MSG("Unhandled flow condition: {}", - static_cast<u32>(instr.flow.cond.Value())); + UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); } break; } case OpCode::Id::KIL: { UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", - static_cast<u32>(cc)); + const ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); bb.push_back(Operation(OperationCode::Discard)); break; @@ -90,11 +87,11 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); return Immediate(0U); case SystemVariable::Tid: { - Node value = Immediate(0); - value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); - value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9); - value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5); - return value; + Node val = Immediate(0); + val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); + val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); + val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); + return val; } case SystemVariable::TidX: return Operation(OperationCode::LocalInvocationIdX); @@ -130,8 +127,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { return Immediate(0u); } default: - UNIMPLEMENTED_MSG("Unhandled system move: {}", - static_cast<u32>(instr.sys20.Value())); + UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); return Immediate(0u); } }(); @@ -181,8 +177,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { } const Node branch = Operation(OperationCode::BranchIndirect, operand); - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - if (cc != Tegra::Shader::ConditionCode::T) { + const ConditionCode cc = instr.flow_condition_code; + if (cc != ConditionCode::T) { bb.push_back(Conditional(GetConditionCode(cc), {branch})); } else { bb.push_back(branch); @@ -218,9 +214,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::SYNC: { - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", - static_cast<u32>(cc)); + const ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); if (decompiled) { break; @@ -231,9 +226,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::BRK: { - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", - static_cast<u32>(cc)); + const ConditionCode cc = instr.flow_condition_code; + UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); if (decompiled) { break; } @@ -306,7 +300,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { case Tegra::Shader::MembarType::GL: return OperationCode::MemoryBarrierGlobal; default: - UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value())); + UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); return OperationCode::MemoryBarrierGlobal; } }(); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index d4ffa8014..a53819c15 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -125,7 +125,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { case OpCode::Id::SHF_LEFT_IMM: { UNIMPLEMENTED_IF(instr.generates_cc); UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", - static_cast<int>(instr.shf.xmode.Value())); + instr.shf.xmode.Value()); if (instr.is_b_imm) { op_b = Immediate(static_cast<u32>(instr.shf.immediate)); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 02fdccd86..833fa2a39 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -34,7 +34,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { case TextureType::TextureCube: return 3; default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); + UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); return 0; } } @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { SamplerInfo info; info.is_shadow = is_depth_compare; - const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); + const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { SamplerInfo info; info.type = texture_type; info.is_array = is_array; - const std::optional<Sampler> sampler = is_bindless - ? GetBindlessSampler(base_reg, info, index_var) - : GetSampler(instr.sampler, info); + const std::optional<SamplerEntry> sampler = + is_bindless ? GetBindlessSampler(base_reg, info, index_var) + : GetSampler(instr.sampler, info); Node4 values; if (!sampler) { std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); @@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { [[fallthrough]]; case OpCode::Id::TXQ: { Node index_var; - const std::optional<Sampler> sampler = is_bindless - ? GetBindlessSampler(instr.gpr8, {}, index_var) - : GetSampler(instr.sampler, {}); + const std::optional<SamplerEntry> sampler = + is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) + : GetSampler(instr.sampler, {}); if (!sampler) { u32 indexer = 0; @@ -255,8 +255,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { break; } default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", - static_cast<u32>(instr.txq.query_type.Value())); + UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); } break; } @@ -273,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { info.type = texture_type; info.is_array = is_array; Node index_var; - const std::optional<Sampler> sampler = + const std::optional<SamplerEntry> sampler = is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) : GetSampler(instr.sampler, info); @@ -302,7 +301,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { case TextureType::TextureCube: return 3; default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type)); + UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); return 2; } }(); @@ -380,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( return info; } -std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, - SamplerInfo sampler_info) { +std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, + SamplerInfo sampler_info) { const u32 offset = static_cast<u32>(sampler.index.Value()); const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [offset](const Sampler& entry) { return entry.offset == offset; }); + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [offset](const SamplerEntry& entry) { return entry.offset == offset; }); if (it != used_samplers.end()) { ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); @@ -400,8 +400,8 @@ std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, *info.is_shadow, *info.is_buffer, false); } -std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var) { +std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, + SamplerInfo info, Node& index_var) { const Node sampler_register = GetRegister(reg); const auto [base_node, tracked_sampler_info] = TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); @@ -417,7 +417,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer, offset](const Sampler& entry) { + [buffer, offset](const SamplerEntry& entry) { return entry.buffer == buffer && entry.offset == offset; }); if (it != used_samplers.end()) { @@ -437,11 +437,12 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); // Try to use an already created sampler if it exists - const auto it = std::find_if( - used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { - return offsets == std::pair{entry.offset, entry.secondary_offset} && - indices == std::pair{entry.buffer, entry.secondary_buffer}; - }); + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [indices, offsets](const SamplerEntry& entry) { + return offsets == std::pair{entry.offset, entry.secondary_offset} && + indices == std::pair{entry.buffer, entry.secondary_buffer}; + }); if (it != used_samplers.end()) { ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); @@ -461,7 +462,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, // If this sampler has already been used, return the existing mapping. const auto it = std::find_if( used_samplers.begin(), used_samplers.end(), - [base_offset](const Sampler& entry) { return entry.offset == base_offset; }); + [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); if (it != used_samplers.end()) { ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && @@ -566,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, info.is_buffer = false; Node index_var; - const std::optional<Sampler> sampler = is_bindless - ? GetBindlessSampler(*bindless_reg, info, index_var) - : GetSampler(instr.sampler, info); + const std::optional<SamplerEntry> sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) + : GetSampler(instr.sampler, info); if (!sampler) { return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; } @@ -595,7 +596,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, lod = GetRegister(instr.gpr20.Value() + bias_offset); break; default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); + UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); break; } @@ -725,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de info.is_shadow = depth_compare; Node index_var; - const std::optional<Sampler> sampler = + const std::optional<SamplerEntry> sampler = is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) : GetSampler(instr.sampler, info); Node4 values; @@ -784,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const std::optional<Sampler> sampler = GetSampler(instr.sampler, {}); + const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -801,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is info.type = texture_type; info.is_array = is_array; info.is_shadow = false; - const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); + const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index 11b77f795..37433d783 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -27,7 +27,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { case VoteOperation::Eq: return OperationCode::VoteEqual; default: - UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op)); + UNREACHABLE_MSG("Invalid vote operation={}", vote_op); return OperationCode::VoteAll; } } diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h index 4e8264367..cda284c72 100644 --- a/src/video_core/shader/expr.h +++ b/src/video_core/shader/expr.h @@ -76,7 +76,7 @@ public: class ExprPredicate final { public: - explicit ExprPredicate(u32 predicate) : predicate{predicate} {} + explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} bool operator==(const ExprPredicate& b) const { return predicate == b.predicate; @@ -91,7 +91,7 @@ public: class ExprCondCode final { public: - explicit ExprCondCode(ConditionCode cc) : cc{cc} {} + explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} bool operator==(const ExprCondCode& b) const { return cc == b.cc; @@ -121,7 +121,7 @@ public: class ExprGprEqual final { public: - ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} + explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} bool operator==(const ExprGprEqual& b) const { return gpr == b.gpr && value == b.value; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index a1e2c4d8e..b54d33763 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -282,26 +282,25 @@ struct SeparateSamplerNode; using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; using TrackSampler = std::shared_ptr<TrackSamplerData>; -struct Sampler { +struct SamplerEntry { /// Bound samplers constructor - constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) + explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, + bool is_shadow_, bool is_buffer_, bool is_indexed_) : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_indexed{is_indexed_} {} /// Separate sampler constructor - constexpr explicit Sampler(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, - Tegra::Shader::TextureType type, bool is_array_, bool is_shadow_, - bool is_buffer_) + explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, + Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, + bool is_buffer_) : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, - buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array_}, + buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} /// Bindless samplers constructor - constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, - Tegra::Shader::TextureType type, bool is_array_, bool is_shadow_, - bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type}, is_array{is_array_}, + explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, + bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) + : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { } @@ -340,14 +339,14 @@ struct BindlessSamplerNode { u32 offset; }; -struct Image { +struct ImageEntry { public: /// Bound images constructor - constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) + explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) : index{index_}, offset{offset_}, type{type_} {} /// Bindless samplers constructor - constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) + explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} void MarkWrite() { @@ -391,7 +390,7 @@ struct MetaArithmetic { /// Parameters describing a texture sampler struct MetaTexture { - Sampler sampler; + SamplerEntry sampler; Node array; Node depth_compare; std::vector<Node> aoffi; @@ -405,7 +404,7 @@ struct MetaTexture { }; struct MetaImage { - const Image& image; + const ImageEntry& image; std::vector<Node> values; u32 element{}; }; diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 7bf4ff387..6a5b6940d 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp @@ -107,7 +107,7 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); return {}; default: - UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); + UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); return {}; } } diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 29d794b34..a4987ffc6 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -25,9 +25,10 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - Registry& registry) - : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { +ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, + Registry& registry_) + : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ + registry_} { Decode(); PostDecode(); } @@ -170,7 +171,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe // Default - do nothing return value; default: - UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); + UNREACHABLE_MSG("Unimplemented conversion size: {}", size); return value; } } @@ -335,15 +336,15 @@ OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { return operation_table[index]; } -Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { +Node ShaderIR::GetConditionCode(ConditionCode cc) const { switch (cc) { - case Tegra::Shader::ConditionCode::NEU: + case ConditionCode::NEU: return GetInternalFlag(InternalFlag::Zero, true); - case Tegra::Shader::ConditionCode::FCSM_TR: + case ConditionCode::FCSM_TR: UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); return MakeNode<PredicateNode>(Pred::NeverExecute, false); default: - UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); + UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); return MakeNode<PredicateNode>(Pred::NeverExecute, false); } } @@ -451,8 +452,8 @@ void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { } std::size_t ShaderIR::DeclareAmend(Node new_amend) { - const std::size_t id = amend_code.size(); - amend_code.push_back(new_amend); + const auto id = amend_code.size(); + amend_code.push_back(std::move(new_amend)); return id; } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 3a98b2104..0c6ab0f07 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -29,8 +29,8 @@ struct ShaderBlock; constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; struct ConstBuffer { - constexpr explicit ConstBuffer(u32 max_offset, bool is_indirect) - : max_offset{max_offset}, is_indirect{is_indirect} {} + constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) + : max_offset{max_offset_}, is_indirect{is_indirect_} {} constexpr ConstBuffer() = default; @@ -66,8 +66,8 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - Registry& registry); + explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, + CompilerSettings settings_, Registry& registry_); ~ShaderIR(); const std::map<u32, NodeBlock>& GetBasicBlocks() const { @@ -94,11 +94,11 @@ public: return used_cbufs; } - const std::list<Sampler>& GetSamplers() const { + const std::list<SamplerEntry>& GetSamplers() const { return used_samplers; } - const std::list<Image>& GetImages() const { + const std::list<ImageEntry>& GetImages() const { return used_images; } @@ -334,17 +334,17 @@ private: std::optional<Tegra::Engines::SamplerDescriptor> sampler); /// Accesses a texture sampler. - std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); + std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); /// Accesses a texture sampler for a bindless texture. - std::optional<Sampler> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var); + std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, + Node& index_var); /// Accesses an image. - Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); /// Access a bindless image sampler. - Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -454,8 +454,8 @@ private: std::set<Tegra::Shader::Attribute::Index> used_input_attributes; std::set<Tegra::Shader::Attribute::Index> used_output_attributes; std::map<u32, ConstBuffer> used_cbufs; - std::list<Sampler> used_samplers; - std::list<Image> used_images; + std::list<SamplerEntry> used_samplers; + std::list<ImageEntry> used_images; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; bool uses_layer{}; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 1688267bb..6308aef94 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -28,7 +28,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t case Tegra::Texture::TextureType::Texture2DArray: return SurfaceTarget::Texture2DArray; default: - LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type)); + LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", texture_type); UNREACHABLE(); return SurfaceTarget::Texture2D; } @@ -47,7 +47,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) { case SurfaceTarget::TextureCubeArray: return true; default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); UNREACHABLE(); return false; } @@ -66,7 +66,7 @@ bool SurfaceTargetIsArray(SurfaceTarget target) { case SurfaceTarget::TextureCubeArray: return true; default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target); UNREACHABLE(); return false; } @@ -85,7 +85,7 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: return PixelFormat::D32_FLOAT_S8_UINT; default: - UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); + UNIMPLEMENTED_MSG("Unimplemented format={}", format); return PixelFormat::S8_UINT_D24_UNORM; } } @@ -183,7 +183,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) case Tegra::RenderTargetFormat::R8_UINT: return PixelFormat::R8_UINT; default: - UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<int>(format)); + UNIMPLEMENTED_MSG("Unimplemented format={}", format); return PixelFormat::A8B8G8R8_UNORM; } } @@ -197,7 +197,7 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM: return PixelFormat::B8G8R8A8_UNORM; default: - UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); + UNIMPLEMENTED_MSG("Unimplemented format={}", format); return PixelFormat::A8B8G8R8_UNORM; } } @@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) { } std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { - return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; + return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; } } // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h index cfd12fa61..c40ab89d0 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -120,7 +120,7 @@ enum class PixelFormat { Max = MaxDepthStencilFormat, Invalid = 255, }; -static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); +constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); enum class SurfaceType { ColorTexture = 0, @@ -140,117 +140,7 @@ enum class SurfaceTarget { TextureCubeArray, }; -constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ - 0, // A8B8G8R8_UNORM - 0, // A8B8G8R8_SNORM - 0, // A8B8G8R8_SINT - 0, // A8B8G8R8_UINT - 0, // R5G6B5_UNORM - 0, // B5G6R5_UNORM - 0, // A1R5G5B5_UNORM - 0, // A2B10G10R10_UNORM - 0, // A2B10G10R10_UINT - 0, // A1B5G5R5_UNORM - 0, // R8_UNORM - 0, // R8_SNORM - 0, // R8_SINT - 0, // R8_UINT - 0, // R16G16B16A16_FLOAT - 0, // R16G16B16A16_UNORM - 0, // R16G16B16A16_SNORM - 0, // R16G16B16A16_SINT - 0, // R16G16B16A16_UINT - 0, // B10G11R11_FLOAT - 0, // R32G32B32A32_UINT - 2, // BC1_RGBA_UNORM - 2, // BC2_UNORM - 2, // BC3_UNORM - 2, // BC4_UNORM - 2, // BC4_SNORM - 2, // BC5_UNORM - 2, // BC5_SNORM - 2, // BC7_UNORM - 2, // BC6H_UFLOAT - 2, // BC6H_SFLOAT - 2, // ASTC_2D_4X4_UNORM - 0, // B8G8R8A8_UNORM - 0, // R32G32B32A32_FLOAT - 0, // R32G32B32A32_SINT - 0, // R32G32_FLOAT - 0, // R32G32_SINT - 0, // R32_FLOAT - 0, // R16_FLOAT - 0, // R16_UNORM - 0, // R16_SNORM - 0, // R16_UINT - 0, // R16_SINT - 0, // R16G16_UNORM - 0, // R16G16_FLOAT - 0, // R16G16_UINT - 0, // R16G16_SINT - 0, // R16G16_SNORM - 0, // R32G32B32_FLOAT - 0, // A8B8G8R8_SRGB - 0, // R8G8_UNORM - 0, // R8G8_SNORM - 0, // R8G8_SINT - 0, // R8G8_UINT - 0, // R32G32_UINT - 0, // R16G16B16X16_FLOAT - 0, // R32_UINT - 0, // R32_SINT - 2, // ASTC_2D_8X8_UNORM - 2, // ASTC_2D_8X5_UNORM - 2, // ASTC_2D_5X4_UNORM - 0, // B8G8R8A8_SRGB - 2, // BC1_RGBA_SRGB - 2, // BC2_SRGB - 2, // BC3_SRGB - 2, // BC7_SRGB - 0, // A4B4G4R4_UNORM - 2, // ASTC_2D_4X4_SRGB - 2, // ASTC_2D_8X8_SRGB - 2, // ASTC_2D_8X5_SRGB - 2, // ASTC_2D_5X4_SRGB - 2, // ASTC_2D_5X5_UNORM - 2, // ASTC_2D_5X5_SRGB - 2, // ASTC_2D_10X8_UNORM - 2, // ASTC_2D_10X8_SRGB - 2, // ASTC_2D_6X6_UNORM - 2, // ASTC_2D_6X6_SRGB - 2, // ASTC_2D_10X10_UNORM - 2, // ASTC_2D_10X10_SRGB - 2, // ASTC_2D_12X12_UNORM - 2, // ASTC_2D_12X12_SRGB - 2, // ASTC_2D_8X6_UNORM - 2, // ASTC_2D_8X6_SRGB - 2, // ASTC_2D_6X5_UNORM - 2, // ASTC_2D_6X5_SRGB - 0, // E5B9G9R9_FLOAT - 0, // D32_FLOAT - 0, // D16_UNORM - 0, // D24_UNORM_S8_UINT - 0, // S8_UINT_D24_UNORM - 0, // D32_FLOAT_S8_UINT -}}; - -/** - * Gets the compression factor for the specified PixelFormat. This applies to just the - * "compressed width" and "compressed height", not the overall compression factor of a - * compressed image. This is used for maintaining proper surface sizes for compressed - * texture formats. - */ -inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { - DEBUG_ASSERT(format != PixelFormat::Invalid); - DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size()); - return compression_factor_shift_table[static_cast<std::size_t>(format)]; -} - -inline constexpr u32 GetCompressionFactor(PixelFormat format) { - return 1U << GetCompressionFactorShift(format); -} - -constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ +constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{ 1, // A8B8G8R8_UNORM 1, // A8B8G8R8_SNORM 1, // A8B8G8R8_SINT @@ -344,15 +234,12 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 1, // D32_FLOAT_S8_UINT }}; -static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - ASSERT(static_cast<std::size_t>(format) < block_width_table.size()); - return block_width_table[static_cast<std::size_t>(format)]; +constexpr u32 DefaultBlockWidth(PixelFormat format) { + ASSERT(static_cast<std::size_t>(format) < BLOCK_WIDTH_TABLE.size()); + return BLOCK_WIDTH_TABLE[static_cast<std::size_t>(format)]; } -constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ +constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{ 1, // A8B8G8R8_UNORM 1, // A8B8G8R8_SNORM 1, // A8B8G8R8_SINT @@ -446,15 +333,12 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 1, // D32_FLOAT_S8_UINT }}; -static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - ASSERT(static_cast<std::size_t>(format) < block_height_table.size()); - return block_height_table[static_cast<std::size_t>(format)]; +constexpr u32 DefaultBlockHeight(PixelFormat format) { + ASSERT(static_cast<std::size_t>(format) < BLOCK_HEIGHT_TABLE.size()); + return BLOCK_HEIGHT_TABLE[static_cast<std::size_t>(format)]; } -constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ +constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{ 32, // A8B8G8R8_UNORM 32, // A8B8G8R8_SNORM 32, // A8B8G8R8_SINT @@ -548,20 +432,14 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 64, // D32_FLOAT_S8_UINT }}; -static constexpr u32 GetFormatBpp(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; - - ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); - return bpp_table[static_cast<std::size_t>(format)]; +constexpr u32 BitsPerBlock(PixelFormat format) { + ASSERT(static_cast<std::size_t>(format) < BITS_PER_BLOCK_TABLE.size()); + return BITS_PER_BLOCK_TABLE[static_cast<std::size_t>(format)]; } /// Returns the sizer in bytes of the specified pixel format -static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { - if (pixel_format == PixelFormat::Invalid) { - return 0; - } - return GetFormatBpp(pixel_format) / CHAR_BIT; +constexpr u32 BytesPerBlock(PixelFormat pixel_format) { + return BitsPerBlock(pixel_format) / CHAR_BIT; } SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp new file mode 100644 index 000000000..a4fc1184b --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.cpp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bit> + +#include "common/alignment.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/accelerated_swizzle.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon::Accelerated { + +using Tegra::Texture::GOB_SIZE_SHIFT; +using Tegra::Texture::GOB_SIZE_X; +using Tegra::Texture::GOB_SIZE_X_SHIFT; +using Tegra::Texture::GOB_SIZE_Y_SHIFT; +using VideoCore::Surface::BytesPerBlock; + +BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle, + const ImageInfo& info) { + const Extent3D block = swizzle.block; + const Extent3D num_tiles = swizzle.num_tiles; + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); + const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; + const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); + return BlockLinearSwizzle2DParams{ + .origin{0, 0, 0}, + .destination{0, 0, 0}, + .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), + .layer_stride = info.layer_stride, + .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth), + .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, + .block_height = block.height, + .block_height_mask = (1U << block.height) - 1, + }; +} + +BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle, + const ImageInfo& info) { + const Extent3D block = swizzle.block; + const Extent3D num_tiles = swizzle.num_tiles; + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); + const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; + + const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); + const u32 slice_size = + Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; + return BlockLinearSwizzle3DParams{ + .origin{0, 0, 0}, + .destination{0, 0, 0}, + .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), + .slice_size = slice_size, + .block_size = block_size, + .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, + .block_height = block.height, + .block_height_mask = (1U << block.height) - 1, + .block_depth = block.depth, + .block_depth_mask = (1U << block.depth) - 1, + }; +} + +} // namespace VideoCommon::Accelerated
\ No newline at end of file diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h new file mode 100644 index 000000000..6ec5c78c4 --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.h @@ -0,0 +1,45 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include "common/common_types.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon::Accelerated { + +struct BlockLinearSwizzle2DParams { + std::array<u32, 3> origin; + std::array<s32, 3> destination; + u32 bytes_per_block_log2; + u32 layer_stride; + u32 block_size; + u32 x_shift; + u32 block_height; + u32 block_height_mask; +}; + +struct BlockLinearSwizzle3DParams { + std::array<u32, 3> origin; + std::array<s32, 3> destination; + u32 bytes_per_block_log2; + u32 slice_size; + u32 block_size; + u32 x_shift; + u32 block_height; + u32 block_height_mask; + u32 block_depth; + u32 block_depth_mask; +}; + +[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams( + const SwizzleParameters& swizzle, const ImageInfo& info); + +[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams( + const SwizzleParameters& swizzle, const ImageInfo& info); + +} // namespace VideoCommon::Accelerated diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h deleted file mode 100644 index 9c21a0649..000000000 --- a/src/video_core/texture_cache/copy_params.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace VideoCommon { - -struct CopyParams { - constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, - u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height, - u32 depth) - : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, - dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, - dest_level{dest_level}, width{width}, height{height}, depth{depth} {} - - constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level) - : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, - dest_level{level}, width{width}, height{height}, depth{depth} {} - - u32 source_x; - u32 source_y; - u32 source_z; - u32 dest_x; - u32 dest_y; - u32 dest_z; - u32 source_level; - u32 dest_level; - u32 width; - u32 height; - u32 depth; -}; - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp new file mode 100644 index 000000000..017327975 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.cpp @@ -0,0 +1,97 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <span> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/texture_cache/decode_bc4.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt +[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { + const u32 code_offset = 16 + 3 * (4 * y + x); + const u32 code = (bits >> code_offset) & 7; + const u32 red0 = (bits >> 0) & 0xff; + const u32 red1 = (bits >> 8) & 0xff; + if (red0 > red1) { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (6 * red0 + 1 * red1) / 7; + case 3: + return (5 * red0 + 2 * red1) / 7; + case 4: + return (4 * red0 + 3 * red1) / 7; + case 5: + return (3 * red0 + 4 * red1) / 7; + case 6: + return (2 * red0 + 5 * red1) / 7; + case 7: + return (1 * red0 + 6 * red1) / 7; + } + } else { + switch (code) { + case 0: + return red0; + case 1: + return red1; + case 2: + return (4 * red0 + 1 * red1) / 5; + case 3: + return (3 * red0 + 2 * red1) / 5; + case 4: + return (2 * red0 + 3 * red1) / 5; + case 5: + return (1 * red0 + 4 * red1) / 5; + case 6: + return 0; + case 7: + return 0xff; + } + } + return 0; +} + +void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) { + UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); + UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); + static constexpr u32 BLOCK_SIZE = 4; + size_t input_offset = 0; + for (u32 slice = 0; slice < extent.depth; ++slice) { + for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { + for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { + u64 bits; + std::memcpy(&bits, &input[input_offset], sizeof(bits)); + input_offset += sizeof(bits); + + for (u32 y = 0; y < BLOCK_SIZE; ++y) { + for (u32 x = 0; x < BLOCK_SIZE; ++x) { + const u32 linear_z = slice; + const u32 linear_y = block_y * BLOCK_SIZE + y; + const u32 linear_x = block_x * BLOCK_SIZE + x; + const u32 offset_z = linear_z * extent.width * extent.height; + const u32 offset_y = linear_y * extent.width; + const u32 offset_x = linear_x; + const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; + const u32 color = DecompressBlock(bits, x, y); + output[output_offset + 0] = static_cast<u8>(color); + output[output_offset + 1] = 0; + output[output_offset + 2] = 0; + output[output_offset + 3] = 0xff; + } + } + } + } + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h new file mode 100644 index 000000000..63fb23508 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.h @@ -0,0 +1,16 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <span> + +#include "common/common_types.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h new file mode 100644 index 000000000..3a03b786f --- /dev/null +++ b/src/video_core/texture_cache/descriptor_table.h @@ -0,0 +1,82 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <vector> + +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/logging/log.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCommon { + +template <typename Descriptor> +class DescriptorTable { +public: + explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} + + [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) { + [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { + return false; + } + Refresh(gpu_addr, limit); + return true; + } + + void Invalidate() noexcept { + std::ranges::fill(read_descriptors, 0); + } + + [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) { + DEBUG_ASSERT(index <= current_limit); + const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); + std::pair<Descriptor, bool> result; + gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); + if (IsDescriptorRead(index)) { + result.second = result.first != descriptors[index]; + } else { + MarkDescriptorAsRead(index); + result.second = true; + } + if (result.second) { + descriptors[index] = result.first; + } + return result; + } + + [[nodiscard]] u32 Limit() const noexcept { + return current_limit; + } + +private: + void Refresh(GPUVAddr gpu_addr, u32 limit) { + current_gpu_addr = gpu_addr; + current_limit = limit; + + const size_t num_descriptors = static_cast<size_t>(limit) + 1; + read_descriptors.clear(); + read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); + descriptors.resize(num_descriptors); + } + + void MarkDescriptorAsRead(u32 index) noexcept { + read_descriptors[index / 64] |= 1ULL << (index % 64); + } + + [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept { + return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0; + } + + Tegra::MemoryManager& gpu_memory; + GPUVAddr current_gpu_addr{}; + u32 current_limit{}; + std::vector<u64> read_descriptors; + std::vector<Descriptor> descriptors; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7d5a75648..ddfb726fe 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <array> #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/texture_cache/format_lookup_table.h" @@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM; constexpr auto SINT = ComponentType::SINT; constexpr auto UINT = ComponentType::UINT; constexpr auto FLOAT = ComponentType::FLOAT; -constexpr bool C = false; // Normal color -constexpr bool S = true; // Srgb - -struct Table { - constexpr Table(TextureFormat texture_format, bool is_srgb, ComponentType red_component, - ComponentType green_component, ComponentType blue_component, - ComponentType alpha_component, PixelFormat pixel_format) - : texture_format{texture_format}, pixel_format{pixel_format}, red_component{red_component}, - green_component{green_component}, blue_component{blue_component}, - alpha_component{alpha_component}, is_srgb{is_srgb} {} - - TextureFormat texture_format; - PixelFormat pixel_format; - ComponentType red_component; - ComponentType green_component; - ComponentType blue_component; - ComponentType alpha_component; - bool is_srgb; -}; -constexpr std::array<Table, 86> DefinitionTable = {{ - {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, - {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, - {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, - {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, - {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, - - {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, - - {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, - {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, - - {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, - - {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, - - {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, - {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, - {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, - {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, - - {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, - {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, - {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, - {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, - - {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, - {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, - {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, - {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, - {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, - - {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, - {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, - {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, - {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, - {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, - - {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, - {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, - {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, - {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, - {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, - - {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, - - {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, - {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, - {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, - - {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, - - {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, - {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, - {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, - - {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, - {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, - {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, - - {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, - - {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, - {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, - {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, - {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, - {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, - - {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, - {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, - - {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, - {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, - - {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, - {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, - - {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, - {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, - - {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, - {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, - - {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, - {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, - - {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, - {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, - - {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, - {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, - - {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, - {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, - - {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, - {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, - - {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, - {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, - - {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, - {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, - - {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, - {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, - - {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, - {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, - - {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, - {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, - - {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, - {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, - - {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, - {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, +constexpr bool LINEAR = false; +constexpr bool SRGB = true; + +constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component, + ComponentType blue_component, ComponentType alpha_component, bool is_srgb) { + u32 hash = is_srgb ? 1 : 0; + hash |= static_cast<u32>(red_component) << 1; + hash |= static_cast<u32>(green_component) << 4; + hash |= static_cast<u32>(blue_component) << 7; + hash |= static_cast<u32>(alpha_component) << 10; + hash |= static_cast<u32>(format) << 13; + return hash; +} - {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, - {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, -}}; +constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) { + return Hash(format, component, component, component, component, is_srgb); +} } // Anonymous namespace -FormatLookupTable::FormatLookupTable() { - table.fill(static_cast<u8>(PixelFormat::Invalid)); - - for (const auto& entry : DefinitionTable) { - table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, - entry.green_component, entry.blue_component, entry.alpha_component)] = - static_cast<u8>(entry.pixel_format); - } -} - -PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, - ComponentType red_component, - ComponentType green_component, - ComponentType blue_component, - ComponentType alpha_component) const noexcept { - const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( - format, is_srgb, red_component, green_component, blue_component, alpha_component)]); - // [[likely]] - if (pixel_format != PixelFormat::Invalid) { - return pixel_format; +PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green, + ComponentType blue, ComponentType alpha, + bool is_srgb) noexcept { + switch (Hash(format, red, green, blue, alpha, is_srgb)) { + case Hash(TextureFormat::A8R8G8B8, UNORM): + return PixelFormat::A8B8G8R8_UNORM; + case Hash(TextureFormat::A8R8G8B8, SNORM): + return PixelFormat::A8B8G8R8_SNORM; + case Hash(TextureFormat::A8R8G8B8, UINT): + return PixelFormat::A8B8G8R8_UINT; + case Hash(TextureFormat::A8R8G8B8, SINT): + return PixelFormat::A8B8G8R8_SINT; + case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB): + return PixelFormat::A8B8G8R8_SRGB; + case Hash(TextureFormat::B5G6R5, UNORM): + return PixelFormat::B5G6R5_UNORM; + case Hash(TextureFormat::A2B10G10R10, UNORM): + return PixelFormat::A2B10G10R10_UNORM; + case Hash(TextureFormat::A2B10G10R10, UINT): + return PixelFormat::A2B10G10R10_UINT; + case Hash(TextureFormat::A1B5G5R5, UNORM): + return PixelFormat::A1B5G5R5_UNORM; + case Hash(TextureFormat::A4B4G4R4, UNORM): + return PixelFormat::A4B4G4R4_UNORM; + case Hash(TextureFormat::R8, UNORM): + return PixelFormat::R8_UNORM; + case Hash(TextureFormat::R8, SNORM): + return PixelFormat::R8_SNORM; + case Hash(TextureFormat::R8, UINT): + return PixelFormat::R8_UINT; + case Hash(TextureFormat::R8, SINT): + return PixelFormat::R8_SINT; + case Hash(TextureFormat::R8G8, UNORM): + return PixelFormat::R8G8_UNORM; + case Hash(TextureFormat::R8G8, SNORM): + return PixelFormat::R8G8_SNORM; + case Hash(TextureFormat::R8G8, UINT): + return PixelFormat::R8G8_UINT; + case Hash(TextureFormat::R8G8, SINT): + return PixelFormat::R8G8_SINT; + case Hash(TextureFormat::R16G16B16A16, FLOAT): + return PixelFormat::R16G16B16A16_FLOAT; + case Hash(TextureFormat::R16G16B16A16, UNORM): + return PixelFormat::R16G16B16A16_UNORM; + case Hash(TextureFormat::R16G16B16A16, SNORM): + return PixelFormat::R16G16B16A16_SNORM; + case Hash(TextureFormat::R16G16B16A16, UINT): + return PixelFormat::R16G16B16A16_UINT; + case Hash(TextureFormat::R16G16B16A16, SINT): + return PixelFormat::R16G16B16A16_SINT; + case Hash(TextureFormat::R16G16, FLOAT): + return PixelFormat::R16G16_FLOAT; + case Hash(TextureFormat::R16G16, UNORM): + return PixelFormat::R16G16_UNORM; + case Hash(TextureFormat::R16G16, SNORM): + return PixelFormat::R16G16_SNORM; + case Hash(TextureFormat::R16G16, UINT): + return PixelFormat::R16G16_UINT; + case Hash(TextureFormat::R16G16, SINT): + return PixelFormat::R16G16_SINT; + case Hash(TextureFormat::R16, FLOAT): + return PixelFormat::R16_FLOAT; + case Hash(TextureFormat::R16, UNORM): + return PixelFormat::R16_UNORM; + case Hash(TextureFormat::R16, SNORM): + return PixelFormat::R16_SNORM; + case Hash(TextureFormat::R16, UINT): + return PixelFormat::R16_UINT; + case Hash(TextureFormat::R16, SINT): + return PixelFormat::R16_SINT; + case Hash(TextureFormat::B10G11R11, FLOAT): + return PixelFormat::B10G11R11_FLOAT; + case Hash(TextureFormat::R32G32B32A32, FLOAT): + return PixelFormat::R32G32B32A32_FLOAT; + case Hash(TextureFormat::R32G32B32A32, UINT): + return PixelFormat::R32G32B32A32_UINT; + case Hash(TextureFormat::R32G32B32A32, SINT): + return PixelFormat::R32G32B32A32_SINT; + case Hash(TextureFormat::R32G32B32, FLOAT): + return PixelFormat::R32G32B32_FLOAT; + case Hash(TextureFormat::R32G32, FLOAT): + return PixelFormat::R32G32_FLOAT; + case Hash(TextureFormat::R32G32, UINT): + return PixelFormat::R32G32_UINT; + case Hash(TextureFormat::R32G32, SINT): + return PixelFormat::R32G32_SINT; + case Hash(TextureFormat::R32, FLOAT): + return PixelFormat::R32_FLOAT; + case Hash(TextureFormat::R32, UINT): + return PixelFormat::R32_UINT; + case Hash(TextureFormat::R32, SINT): + return PixelFormat::R32_SINT; + case Hash(TextureFormat::E5B9G9R9, FLOAT): + return PixelFormat::E5B9G9R9_FLOAT; + case Hash(TextureFormat::D32, FLOAT): + return PixelFormat::D32_FLOAT; + case Hash(TextureFormat::D16, UNORM): + return PixelFormat::D16_UNORM; + case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): + return PixelFormat::S8_UINT_D24_UNORM; + case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): + return PixelFormat::S8_UINT_D24_UNORM; + case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): + return PixelFormat::D32_FLOAT_S8_UINT; + case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): + return PixelFormat::BC1_RGBA_UNORM; + case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB): + return PixelFormat::BC1_RGBA_SRGB; + case Hash(TextureFormat::BC2, UNORM, LINEAR): + return PixelFormat::BC2_UNORM; + case Hash(TextureFormat::BC2, UNORM, SRGB): + return PixelFormat::BC2_SRGB; + case Hash(TextureFormat::BC3, UNORM, LINEAR): + return PixelFormat::BC3_UNORM; + case Hash(TextureFormat::BC3, UNORM, SRGB): + return PixelFormat::BC3_SRGB; + case Hash(TextureFormat::BC4, UNORM): + return PixelFormat::BC4_UNORM; + case Hash(TextureFormat::BC4, SNORM): + return PixelFormat::BC4_SNORM; + case Hash(TextureFormat::BC5, UNORM): + return PixelFormat::BC5_UNORM; + case Hash(TextureFormat::BC5, SNORM): + return PixelFormat::BC5_SNORM; + case Hash(TextureFormat::BC7, UNORM, LINEAR): + return PixelFormat::BC7_UNORM; + case Hash(TextureFormat::BC7, UNORM, SRGB): + return PixelFormat::BC7_SRGB; + case Hash(TextureFormat::BC6H_SFLOAT, FLOAT): + return PixelFormat::BC6H_SFLOAT; + case Hash(TextureFormat::BC6H_UFLOAT, FLOAT): + return PixelFormat::BC6H_UFLOAT; + case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR): + return PixelFormat::ASTC_2D_4X4_UNORM; + case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB): + return PixelFormat::ASTC_2D_4X4_SRGB; + case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR): + return PixelFormat::ASTC_2D_5X4_UNORM; + case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB): + return PixelFormat::ASTC_2D_5X4_SRGB; + case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR): + return PixelFormat::ASTC_2D_5X5_UNORM; + case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB): + return PixelFormat::ASTC_2D_5X5_SRGB; + case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR): + return PixelFormat::ASTC_2D_8X8_UNORM; + case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB): + return PixelFormat::ASTC_2D_8X8_SRGB; + case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR): + return PixelFormat::ASTC_2D_8X5_UNORM; + case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB): + return PixelFormat::ASTC_2D_8X5_SRGB; + case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR): + return PixelFormat::ASTC_2D_10X8_UNORM; + case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB): + return PixelFormat::ASTC_2D_10X8_SRGB; + case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR): + return PixelFormat::ASTC_2D_6X6_UNORM; + case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB): + return PixelFormat::ASTC_2D_6X6_SRGB; + case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR): + return PixelFormat::ASTC_2D_10X10_UNORM; + case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): + return PixelFormat::ASTC_2D_10X10_SRGB; + case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): + return PixelFormat::ASTC_2D_12X12_UNORM; + case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): + return PixelFormat::ASTC_2D_12X12_SRGB; + case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR): + return PixelFormat::ASTC_2D_8X6_UNORM; + case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB): + return PixelFormat::ASTC_2D_8X6_SRGB; + case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR): + return PixelFormat::ASTC_2D_6X5_UNORM; + case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB): + return PixelFormat::ASTC_2D_6X5_SRGB; } UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", - static_cast<int>(format), is_srgb, static_cast<int>(red_component), - static_cast<int>(green_component), static_cast<int>(blue_component), - static_cast<int>(alpha_component)); + static_cast<int>(format), is_srgb, static_cast<int>(red), + static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha)); return PixelFormat::A8B8G8R8_UNORM; } -void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, - ComponentType green_component, ComponentType blue_component, - ComponentType alpha_component, PixelFormat pixel_format) {} - -std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb, - ComponentType red_component, - ComponentType green_component, - ComponentType blue_component, - ComponentType alpha_component) noexcept { - const auto format_index = static_cast<std::size_t>(format); - const auto red_index = static_cast<std::size_t>(red_component); - const auto green_index = static_cast<std::size_t>(green_component); - const auto blue_index = static_cast<std::size_t>(blue_component); - const auto alpha_index = static_cast<std::size_t>(alpha_component); - const std::size_t srgb_index = is_srgb ? 1 : 0; - - return format_index * PerFormat + - srgb_index * PerComponent * PerComponent * PerComponent * PerComponent + - alpha_index * PerComponent * PerComponent * PerComponent + - blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index; -} - } // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h index aa77e0a5a..729533999 100644 --- a/src/video_core/texture_cache/format_lookup_table.h +++ b/src/video_core/texture_cache/format_lookup_table.h @@ -4,48 +4,14 @@ #pragma once -#include <array> -#include <limits> #include "video_core/surface.h" #include "video_core/textures/texture.h" namespace VideoCommon { -class FormatLookupTable { -public: - explicit FormatLookupTable(); - - VideoCore::Surface::PixelFormat GetPixelFormat( - Tegra::Texture::TextureFormat format, bool is_srgb, - Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component, - Tegra::Texture::ComponentType blue_component, - Tegra::Texture::ComponentType alpha_component) const noexcept; - -private: - static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max()); - - static constexpr std::size_t NumTextureFormats = 128; - - static constexpr std::size_t PerComponent = 8; - static constexpr std::size_t PerComponents2 = PerComponent * PerComponent; - static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent; - static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent; - static constexpr std::size_t PerFormat = PerComponents4 * 2; - - static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb, - Tegra::Texture::ComponentType red_component, - Tegra::Texture::ComponentType green_component, - Tegra::Texture::ComponentType blue_component, - Tegra::Texture::ComponentType alpha_component) noexcept; - - void Set(Tegra::Texture::TextureFormat format, bool is_srgb, - Tegra::Texture::ComponentType red_component, - Tegra::Texture::ComponentType green_component, - Tegra::Texture::ComponentType blue_component, - Tegra::Texture::ComponentType alpha_component, - VideoCore::Surface::PixelFormat pixel_format); - - std::array<u8, NumTextureFormats * PerFormat> table; -}; +VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo( + Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component, + Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component, + Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp new file mode 100644 index 000000000..d10ba4ccd --- /dev/null +++ b/src/video_core/texture_cache/formatter.cpp @@ -0,0 +1,95 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <string> + +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/render_targets.h" + +namespace VideoCommon { + +std::string Name(const ImageBase& image) { + const GPUVAddr gpu_addr = image.gpu_addr; + const ImageInfo& info = image.info; + const u32 width = info.size.width; + const u32 height = info.size.height; + const u32 depth = info.size.depth; + const u32 num_layers = image.info.resources.layers; + const u32 num_levels = image.info.resources.levels; + std::string resource; + if (num_layers > 1) { + resource += fmt::format(":L{}", num_layers); + } + if (num_levels > 1) { + resource += fmt::format(":M{}", num_levels); + } + switch (image.info.type) { + case ImageType::e1D: + return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource); + case ImageType::e2D: + return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource); + case ImageType::e3D: + return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource); + case ImageType::Linear: + return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height); + case ImageType::Buffer: + return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width); + } + return "Invalid"; +} + +std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { + const u32 width = image_view.size.width; + const u32 height = image_view.size.height; + const u32 depth = image_view.size.depth; + const u32 num_levels = image_view.range.extent.levels; + const u32 num_layers = image_view.range.extent.layers; + + const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; + switch (type.value_or(image_view.type)) { + case ImageViewType::e1D: + return fmt::format("ImageView 1D {}{}", width, level); + case ImageViewType::e2D: + return fmt::format("ImageView 2D {}x{}{}", width, height, level); + case ImageViewType::Cube: + return fmt::format("ImageView Cube {}x{}{}", width, height, level); + case ImageViewType::e3D: + return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); + case ImageViewType::e1DArray: + return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); + case ImageViewType::e2DArray: + return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); + case ImageViewType::CubeArray: + return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); + case ImageViewType::Rect: + return fmt::format("ImageView Rect {}x{}{}", width, height, level); + case ImageViewType::Buffer: + return fmt::format("BufferView {}", width); + } + return "Invalid"; +} + +std::string Name(const RenderTargets& render_targets) { + std::string_view debug_prefix; + const auto num_color = std::ranges::count_if( + render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); }); + if (render_targets.depth_buffer_id) { + debug_prefix = num_color > 0 ? "R" : "Z"; + } else { + debug_prefix = num_color > 0 ? "C" : "X"; + } + const Extent2D size = render_targets.size; + if (num_color > 0) { + return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width, + size.height); + } else { + return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h new file mode 100644 index 000000000..a48413983 --- /dev/null +++ b/src/video_core/texture_cache/formatter.h @@ -0,0 +1,263 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +#include <fmt/format.h> + +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +template <> +struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> { + template <typename FormatContext> + auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) { + using VideoCore::Surface::PixelFormat; + const string_view name = [format] { + switch (format) { + case PixelFormat::A8B8G8R8_UNORM: + return "A8B8G8R8_UNORM"; + case PixelFormat::A8B8G8R8_SNORM: + return "A8B8G8R8_SNORM"; + case PixelFormat::A8B8G8R8_SINT: + return "A8B8G8R8_SINT"; + case PixelFormat::A8B8G8R8_UINT: + return "A8B8G8R8_UINT"; + case PixelFormat::R5G6B5_UNORM: + return "R5G6B5_UNORM"; + case PixelFormat::B5G6R5_UNORM: + return "B5G6R5_UNORM"; + case PixelFormat::A1R5G5B5_UNORM: + return "A1R5G5B5_UNORM"; + case PixelFormat::A2B10G10R10_UNORM: + return "A2B10G10R10_UNORM"; + case PixelFormat::A2B10G10R10_UINT: + return "A2B10G10R10_UINT"; + case PixelFormat::A1B5G5R5_UNORM: + return "A1B5G5R5_UNORM"; + case PixelFormat::R8_UNORM: + return "R8_UNORM"; + case PixelFormat::R8_SNORM: + return "R8_SNORM"; + case PixelFormat::R8_SINT: + return "R8_SINT"; + case PixelFormat::R8_UINT: + return "R8_UINT"; + case PixelFormat::R16G16B16A16_FLOAT: + return "R16G16B16A16_FLOAT"; + case PixelFormat::R16G16B16A16_UNORM: + return "R16G16B16A16_UNORM"; + case PixelFormat::R16G16B16A16_SNORM: + return "R16G16B16A16_SNORM"; + case PixelFormat::R16G16B16A16_SINT: + return "R16G16B16A16_SINT"; + case PixelFormat::R16G16B16A16_UINT: + return "R16G16B16A16_UINT"; + case PixelFormat::B10G11R11_FLOAT: + return "B10G11R11_FLOAT"; + case PixelFormat::R32G32B32A32_UINT: + return "R32G32B32A32_UINT"; + case PixelFormat::BC1_RGBA_UNORM: + return "BC1_RGBA_UNORM"; + case PixelFormat::BC2_UNORM: + return "BC2_UNORM"; + case PixelFormat::BC3_UNORM: + return "BC3_UNORM"; + case PixelFormat::BC4_UNORM: + return "BC4_UNORM"; + case PixelFormat::BC4_SNORM: + return "BC4_SNORM"; + case PixelFormat::BC5_UNORM: + return "BC5_UNORM"; + case PixelFormat::BC5_SNORM: + return "BC5_SNORM"; + case PixelFormat::BC7_UNORM: + return "BC7_UNORM"; + case PixelFormat::BC6H_UFLOAT: + return "BC6H_UFLOAT"; + case PixelFormat::BC6H_SFLOAT: + return "BC6H_SFLOAT"; + case PixelFormat::ASTC_2D_4X4_UNORM: + return "ASTC_2D_4X4_UNORM"; + case PixelFormat::B8G8R8A8_UNORM: + return "B8G8R8A8_UNORM"; + case PixelFormat::R32G32B32A32_FLOAT: + return "R32G32B32A32_FLOAT"; + case PixelFormat::R32G32B32A32_SINT: + return "R32G32B32A32_SINT"; + case PixelFormat::R32G32_FLOAT: + return "R32G32_FLOAT"; + case PixelFormat::R32G32_SINT: + return "R32G32_SINT"; + case PixelFormat::R32_FLOAT: + return "R32_FLOAT"; + case PixelFormat::R16_FLOAT: + return "R16_FLOAT"; + case PixelFormat::R16_UNORM: + return "R16_UNORM"; + case PixelFormat::R16_SNORM: + return "R16_SNORM"; + case PixelFormat::R16_UINT: + return "R16_UINT"; + case PixelFormat::R16_SINT: + return "R16_SINT"; + case PixelFormat::R16G16_UNORM: + return "R16G16_UNORM"; + case PixelFormat::R16G16_FLOAT: + return "R16G16_FLOAT"; + case PixelFormat::R16G16_UINT: + return "R16G16_UINT"; + case PixelFormat::R16G16_SINT: + return "R16G16_SINT"; + case PixelFormat::R16G16_SNORM: + return "R16G16_SNORM"; + case PixelFormat::R32G32B32_FLOAT: + return "R32G32B32_FLOAT"; + case PixelFormat::A8B8G8R8_SRGB: + return "A8B8G8R8_SRGB"; + case PixelFormat::R8G8_UNORM: + return "R8G8_UNORM"; + case PixelFormat::R8G8_SNORM: + return "R8G8_SNORM"; + case PixelFormat::R8G8_SINT: + return "R8G8_SINT"; + case PixelFormat::R8G8_UINT: + return "R8G8_UINT"; + case PixelFormat::R32G32_UINT: + return "R32G32_UINT"; + case PixelFormat::R16G16B16X16_FLOAT: + return "R16G16B16X16_FLOAT"; + case PixelFormat::R32_UINT: + return "R32_UINT"; + case PixelFormat::R32_SINT: + return "R32_SINT"; + case PixelFormat::ASTC_2D_8X8_UNORM: + return "ASTC_2D_8X8_UNORM"; + case PixelFormat::ASTC_2D_8X5_UNORM: + return "ASTC_2D_8X5_UNORM"; + case PixelFormat::ASTC_2D_5X4_UNORM: + return "ASTC_2D_5X4_UNORM"; + case PixelFormat::B8G8R8A8_SRGB: + return "B8G8R8A8_SRGB"; + case PixelFormat::BC1_RGBA_SRGB: + return "BC1_RGBA_SRGB"; + case PixelFormat::BC2_SRGB: + return "BC2_SRGB"; + case PixelFormat::BC3_SRGB: + return "BC3_SRGB"; + case PixelFormat::BC7_SRGB: + return "BC7_SRGB"; + case PixelFormat::A4B4G4R4_UNORM: + return "A4B4G4R4_UNORM"; + case PixelFormat::ASTC_2D_4X4_SRGB: + return "ASTC_2D_4X4_SRGB"; + case PixelFormat::ASTC_2D_8X8_SRGB: + return "ASTC_2D_8X8_SRGB"; + case PixelFormat::ASTC_2D_8X5_SRGB: + return "ASTC_2D_8X5_SRGB"; + case PixelFormat::ASTC_2D_5X4_SRGB: + return "ASTC_2D_5X4_SRGB"; + case PixelFormat::ASTC_2D_5X5_UNORM: + return "ASTC_2D_5X5_UNORM"; + case PixelFormat::ASTC_2D_5X5_SRGB: + return "ASTC_2D_5X5_SRGB"; + case PixelFormat::ASTC_2D_10X8_UNORM: + return "ASTC_2D_10X8_UNORM"; + case PixelFormat::ASTC_2D_10X8_SRGB: + return "ASTC_2D_10X8_SRGB"; + case PixelFormat::ASTC_2D_6X6_UNORM: + return "ASTC_2D_6X6_UNORM"; + case PixelFormat::ASTC_2D_6X6_SRGB: + return "ASTC_2D_6X6_SRGB"; + case PixelFormat::ASTC_2D_10X10_UNORM: + return "ASTC_2D_10X10_UNORM"; + case PixelFormat::ASTC_2D_10X10_SRGB: + return "ASTC_2D_10X10_SRGB"; + case PixelFormat::ASTC_2D_12X12_UNORM: + return "ASTC_2D_12X12_UNORM"; + case PixelFormat::ASTC_2D_12X12_SRGB: + return "ASTC_2D_12X12_SRGB"; + case PixelFormat::ASTC_2D_8X6_UNORM: + return "ASTC_2D_8X6_UNORM"; + case PixelFormat::ASTC_2D_8X6_SRGB: + return "ASTC_2D_8X6_SRGB"; + case PixelFormat::ASTC_2D_6X5_UNORM: + return "ASTC_2D_6X5_UNORM"; + case PixelFormat::ASTC_2D_6X5_SRGB: + return "ASTC_2D_6X5_SRGB"; + case PixelFormat::E5B9G9R9_FLOAT: + return "E5B9G9R9_FLOAT"; + case PixelFormat::D32_FLOAT: + return "D32_FLOAT"; + case PixelFormat::D16_UNORM: + return "D16_UNORM"; + case PixelFormat::D24_UNORM_S8_UINT: + return "D24_UNORM_S8_UINT"; + case PixelFormat::S8_UINT_D24_UNORM: + return "S8_UINT_D24_UNORM"; + case PixelFormat::D32_FLOAT_S8_UINT: + return "D32_FLOAT_S8_UINT"; + case PixelFormat::MaxDepthStencilFormat: + case PixelFormat::Invalid: + return "Invalid"; + } + return "Invalid"; + }(); + return formatter<string_view>::format(name, ctx); + } +}; + +template <> +struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> { + template <typename FormatContext> + auto format(VideoCommon::ImageType type, FormatContext& ctx) { + const string_view name = [type] { + using VideoCommon::ImageType; + switch (type) { + case ImageType::e1D: + return "1D"; + case ImageType::e2D: + return "2D"; + case ImageType::e3D: + return "3D"; + case ImageType::Linear: + return "Linear"; + case ImageType::Buffer: + return "Buffer"; + } + return "Invalid"; + }(); + return formatter<string_view>::format(name, ctx); + } +}; + +template <> +struct fmt::formatter<VideoCommon::Extent3D> { + constexpr auto parse(fmt::format_parse_context& ctx) { + return ctx.begin(); + } + + template <typename FormatContext> + auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height, + extent.depth); + } +}; + +namespace VideoCommon { + +struct ImageBase; +struct ImageViewBase; +struct RenderTargets; + +[[nodiscard]] std::string Name(const ImageBase& image); + +[[nodiscard]] std::string Name(const ImageViewBase& image_view, + std::optional<ImageViewType> type = std::nullopt); + +[[nodiscard]] std::string Name(const RenderTargets& render_targets); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp new file mode 100644 index 000000000..448a05fcc --- /dev/null +++ b/src/video_core/texture_cache/image_base.cpp @@ -0,0 +1,216 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <optional> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/util.h" + +namespace VideoCommon { + +using VideoCore::Surface::DefaultBlockHeight; +using VideoCore::Surface::DefaultBlockWidth; + +namespace { +/// Returns the base layer and mip level offset +[[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) { + if (layer_stride == 0) { + return {0, diff}; + } else { + return {diff / layer_stride, diff % layer_stride}; + } +} + +[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) { + return layers.base_level < info.resources.levels && + layers.base_layer + layers.num_layers <= info.resources.layers; +} + +[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) { + const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level); + const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level); + if (!ValidateLayers(copy.src_subresource, src)) { + return false; + } + if (!ValidateLayers(copy.dst_subresource, dst)) { + return false; + } + if (copy.src_offset.x + copy.extent.width > src_size.width || + copy.src_offset.y + copy.extent.height > src_size.height || + copy.src_offset.z + copy.extent.depth > src_size.depth) { + return false; + } + if (copy.dst_offset.x + copy.extent.width > dst_size.width || + copy.dst_offset.y + copy.extent.height > dst_size.height || + copy.dst_offset.z + copy.extent.depth > dst_size.depth) { + return false; + } + return true; +} +} // Anonymous namespace + +ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) + : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, + unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, + converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, + cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, + mip_level_offsets{CalculateMipLevelOffsets(info)} { + if (info.type == ImageType::e3D) { + slice_offsets = CalculateSliceOffsets(info); + slice_subresources = CalculateSliceSubresources(info); + } +} + +std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { + if (other_addr < gpu_addr) { + // Subresource address can't be lower than the base + return std::nullopt; + } + const u32 diff = static_cast<u32>(other_addr - gpu_addr); + if (diff > guest_size_bytes) { + // This can happen when two CPU addresses are used for different GPU addresses + return std::nullopt; + } + if (info.type != ImageType::e3D) { + const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); + const auto end = mip_level_offsets.begin() + info.resources.levels; + const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); + if (layer > info.resources.layers || it == end) { + return std::nullopt; + } + return SubresourceBase{ + .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)), + .layer = layer, + }; + } else { + // TODO: Consider using binary_search after a threshold + const auto it = std::ranges::find(slice_offsets, diff); + if (it == slice_offsets.cend()) { + return std::nullopt; + } + return slice_subresources[std::distance(slice_offsets.begin(), it)]; + } +} + +ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept { + const auto it = std::ranges::find(image_view_infos, view_info); + if (it == image_view_infos.end()) { + return ImageViewId{}; + } + return image_view_ids[std::distance(image_view_infos.begin(), it)]; +} + +void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) { + image_view_infos.push_back(view_info); + image_view_ids.push_back(image_view_id); +} + +void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { + static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; + ASSERT(lhs.info.type == rhs.info.type); + std::optional<SubresourceBase> base; + if (lhs.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS); + } + if (!base) { + LOG_ERROR(HW_GPU, "Image alias should have been flipped"); + return; + } + const PixelFormat lhs_format = lhs.info.format; + const PixelFormat rhs_format = rhs.info.format; + const Extent2D lhs_block{ + .width = DefaultBlockWidth(lhs_format), + .height = DefaultBlockHeight(lhs_format), + }; + const Extent2D rhs_block{ + .width = DefaultBlockWidth(rhs_format), + .height = DefaultBlockHeight(rhs_format), + }; + const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1; + const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; + if (is_lhs_compressed && is_rhs_compressed) { + LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented"); + return; + } + const s32 lhs_mips = lhs.info.resources.levels; + const s32 rhs_mips = rhs.info.resources.levels; + const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); + AliasedImage lhs_alias; + AliasedImage rhs_alias; + lhs_alias.id = rhs_id; + rhs_alias.id = lhs_id; + lhs_alias.copies.reserve(num_mips); + rhs_alias.copies.reserve(num_mips); + for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) { + Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level); + Extent3D rhs_size = MipSize(rhs.info.size, mip_level); + if (is_lhs_compressed) { + lhs_size.width /= lhs_block.width; + lhs_size.height /= lhs_block.height; + } + if (is_rhs_compressed) { + rhs_size.width /= rhs_block.width; + rhs_size.height /= rhs_block.height; + } + const Extent3D copy_size{ + .width = std::min(lhs_size.width, rhs_size.width), + .height = std::min(lhs_size.height, rhs_size.height), + .depth = std::min(lhs_size.depth, rhs_size.depth), + }; + if (copy_size.width == 0 || copy_size.height == 0) { + LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased."); + continue; + } + const bool is_lhs_3d = lhs.info.type == ImageType::e3D; + const bool is_rhs_3d = rhs.info.type == ImageType::e3D; + const Offset3D lhs_offset{0, 0, 0}; + const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0}; + const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer; + const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers; + const s32 num_layers = std::min(lhs_layers, rhs_layers); + const SubresourceLayers lhs_subresource{ + .base_level = mip_level, + .base_layer = 0, + .num_layers = num_layers, + }; + const SubresourceLayers rhs_subresource{ + .base_level = base->level + mip_level, + .base_layer = is_rhs_3d ? 0 : base->layer, + .num_layers = num_layers, + }; + [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{ + .src_subresource = lhs_subresource, + .dst_subresource = rhs_subresource, + .src_offset = lhs_offset, + .dst_offset = rhs_offset, + .extent = copy_size, + }); + [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{ + .src_subresource = rhs_subresource, + .dst_subresource = lhs_subresource, + .src_offset = rhs_offset, + .dst_offset = lhs_offset, + .extent = copy_size, + }); + ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy"); + ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy"); + } + ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); + if (lhs_alias.copies.empty()) { + return; + } + lhs.aliased_images.push_back(std::move(lhs_alias)); + rhs.aliased_images.push_back(std::move(rhs_alias)); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h new file mode 100644 index 000000000..b7f3b7e43 --- /dev/null +++ b/src/video_core/texture_cache/image_base.h @@ -0,0 +1,83 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <optional> +#include <vector> + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +enum class ImageFlagBits : u32 { + AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU + Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted + CpuModified = 1 << 2, ///< Contents have been modified from the CPU + GpuModified = 1 << 3, ///< Contents have been modified from the GPU + Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT + Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted + Registered = 1 << 6, ///< True when the image is registered + Picked = 1 << 7, ///< Temporary flag to mark the image as picked +}; +DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) + +struct ImageViewInfo; + +struct AliasedImage { + std::vector<ImageCopy> copies; + ImageId id; +}; + +struct ImageBase { + explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + + [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; + + [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept; + + void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); + + [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_cpu_addr + overlap_size; + return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; + } + + ImageInfo info; + + u32 guest_size_bytes = 0; + u32 unswizzled_size_bytes = 0; + u32 converted_size_bytes = 0; + ImageFlagBits flags = ImageFlagBits::CpuModified; + + GPUVAddr gpu_addr = 0; + VAddr cpu_addr = 0; + VAddr cpu_addr_end = 0; + + u64 modification_tick = 0; + u64 frame_tick = 0; + + std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; + + std::vector<ImageViewInfo> image_view_infos; + std::vector<ImageViewId> image_view_ids; + + std::vector<u32> slice_offsets; + std::vector<SubresourceBase> slice_subresources; + + std::vector<AliasedImage> aliased_images; +}; + +struct ImageAllocBase { + std::vector<ImageId> images; +}; + +void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp new file mode 100644 index 000000000..64fd7010a --- /dev/null +++ b/src/video_core/texture_cache/image_info.cpp @@ -0,0 +1,189 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +ImageInfo::ImageInfo(const TICEntry& config) noexcept { + format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, + config.a_type, config.srgb_conversion); + num_samples = NumSamples(config.msaa_mode); + resources.levels = config.max_mip_level + 1; + if (config.IsPitchLinear()) { + pitch = config.Pitch(); + } else if (config.IsBlockLinear()) { + block = Extent3D{ + .width = config.block_width, + .height = config.block_height, + .depth = config.block_depth, + }; + } + tile_width_spacing = config.tile_width_spacing; + if (config.texture_type != TextureType::Texture2D && + config.texture_type != TextureType::Texture2DNoMipmap) { + ASSERT(!config.IsPitchLinear()); + } + switch (config.texture_type) { + case TextureType::Texture1D: + ASSERT(config.BaseLayer() == 0); + type = ImageType::e1D; + size.width = config.Width(); + break; + case TextureType::Texture1DArray: + UNIMPLEMENTED_IF(config.BaseLayer() != 0); + type = ImageType::e1D; + size.width = config.Width(); + resources.layers = config.Depth(); + break; + case TextureType::Texture2D: + case TextureType::Texture2DNoMipmap: + ASSERT(config.Depth() == 1); + type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + 1; + break; + case TextureType::Texture2DArray: + type = ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + config.Depth(); + break; + case TextureType::TextureCubemap: + ASSERT(config.Depth() == 1); + type = ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + 6; + break; + case TextureType::TextureCubeArray: + UNIMPLEMENTED_IF(config.load_store_hint != 0); + type = ImageType::e2D; + size.width = config.Width(); + size.height = config.Height(); + resources.layers = config.BaseLayer() + config.Depth() * 6; + break; + case TextureType::Texture3D: + ASSERT(config.BaseLayer() == 0); + type = ImageType::e3D; + size.width = config.Width(); + size.height = config.Height(); + size.depth = config.Depth(); + break; + case TextureType::Texture1DBuffer: + type = ImageType::Buffer; + size.width = config.Width(); + break; + default: + UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); + break; + } + if (type != ImageType::Linear) { + // FIXME: Call this without passing *this + layer_stride = CalculateLayerStride(*this); + maybe_unaligned_layer_stride = CalculateLayerSize(*this); + } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { + const auto& rt = regs.rt[index]; + format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); + if (rt.tile_mode.is_pitch_linear) { + ASSERT(rt.tile_mode.is_3d == 0); + type = ImageType::Linear; + pitch = rt.width; + size = Extent3D{ + .width = pitch / BytesPerBlock(format), + .height = rt.height, + .depth = 1, + }; + return; + } + size.width = rt.width; + size.height = rt.height; + layer_stride = rt.layer_stride * 4; + maybe_unaligned_layer_stride = layer_stride; + num_samples = NumSamples(regs.multisample_mode); + block = Extent3D{ + .width = rt.tile_mode.block_width, + .height = rt.tile_mode.block_height, + .depth = rt.tile_mode.block_depth, + }; + if (rt.tile_mode.is_3d) { + type = ImageType::e3D; + size.depth = rt.depth; + } else { + type = ImageType::e2D; + resources.layers = rt.depth; + } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { + format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); + size.width = regs.zeta_width; + size.height = regs.zeta_height; + resources.levels = 1; + layer_stride = regs.zeta.layer_stride * 4; + maybe_unaligned_layer_stride = layer_stride; + num_samples = NumSamples(regs.multisample_mode); + block = Extent3D{ + .width = regs.zeta.tile_mode.block_width, + .height = regs.zeta.tile_mode.block_height, + .depth = regs.zeta.tile_mode.block_depth, + }; + if (regs.zeta.tile_mode.is_pitch_linear) { + ASSERT(regs.zeta.tile_mode.is_3d == 0); + type = ImageType::Linear; + pitch = size.width * BytesPerBlock(format); + } else if (regs.zeta.tile_mode.is_3d) { + ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0); + type = ImageType::e3D; + size.depth = regs.zeta_depth; + } else { + type = ImageType::e2D; + resources.layers = regs.zeta_depth; + } +} + +ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { + UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); + format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); + if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { + type = ImageType::Linear; + size = Extent3D{ + .width = config.pitch / VideoCore::Surface::BytesPerBlock(format), + .height = config.height, + .depth = 1, + }; + pitch = config.pitch; + } else { + type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; + block = Extent3D{ + .width = config.block_width, + .height = config.block_height, + .depth = config.block_depth, + }; + // 3D blits with more than once slice are not implemented for now + // Render to individual slices + size = Extent3D{ + .width = config.width, + .height = config.height, + .depth = 1, + }; + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h new file mode 100644 index 000000000..5049fc36e --- /dev/null +++ b/src/video_core/texture_cache/image_info.h @@ -0,0 +1,38 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +struct ImageInfo { + explicit ImageInfo() = default; + explicit ImageInfo(const TICEntry& config) noexcept; + explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; + explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; + explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; + + PixelFormat format = PixelFormat::Invalid; + ImageType type = ImageType::e1D; + SubresourceExtent resources; + Extent3D size{1, 1, 1}; + union { + Extent3D block{0, 0, 0}; + u32 pitch; + }; + u32 layer_stride = 0; + u32 maybe_unaligned_layer_stride = 0; + u32 num_samples = 1; + u32 tile_width_spacing = 0; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp new file mode 100644 index 000000000..076a4bcfd --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> + +#include "common/assert.h" +#include "core/settings.h" +#include "video_core/compatible_formats.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, + ImageId image_id_) + : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, + size{ + .width = std::max(image_info.size.width >> range.base.level, 1u), + .height = std::max(image_info.size.height >> range.base.level, 1u), + .depth = std::max(image_info.size.depth >> range.base.level, 1u), + } { + ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format), + "Image view format {} is incompatible with image format {}", info.format, + image_info.format); + const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); + if (image_info.type == ImageType::Linear && is_async) { + flags |= ImageViewFlagBits::PreemtiveDownload; + } + if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) { + flags |= ImageViewFlagBits::Slice; + } +} + +ImageViewBase::ImageViewBase(const NullImageParams&) {} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h new file mode 100644 index 000000000..73954167e --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.h @@ -0,0 +1,47 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +using VideoCore::Surface::PixelFormat; + +struct ImageViewInfo; +struct ImageInfo; + +struct NullImageParams {}; + +enum class ImageViewFlagBits : u16 { + PreemtiveDownload = 1 << 0, + Strong = 1 << 1, + Slice = 1 << 2, +}; +DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) + +struct ImageViewBase { + explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, + ImageId image_id); + explicit ImageViewBase(const NullImageParams&); + + [[nodiscard]] bool IsBuffer() const noexcept { + return type == ImageViewType::Buffer; + } + + ImageId image_id{}; + PixelFormat format{}; + ImageViewType type{}; + SubresourceRange range; + Extent3D size{0, 0, 0}; + ImageViewFlagBits flags{}; + + u64 invalidation_tick = 0; + u64 modification_tick = 0; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp new file mode 100644 index 000000000..faf5b151f --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.cpp @@ -0,0 +1,88 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <limits> + +#include "common/assert.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +namespace { + +constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max(); + +[[nodiscard]] u8 CastSwizzle(SwizzleSource source) { + const u8 casted = static_cast<u8>(source); + ASSERT(static_cast<SwizzleSource>(casted) == source); + return casted; +} + +} // Anonymous namespace + +ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept + : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)}, + y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)}, + w_source{CastSwizzle(config.w_source)} { + range.base = SubresourceBase{ + .level = static_cast<s32>(config.res_min_mip_level), + .layer = base_layer, + }; + range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1; + + switch (config.texture_type) { + case TextureType::Texture1D: + ASSERT(config.Height() == 1); + ASSERT(config.Depth() == 1); + type = ImageViewType::e1D; + break; + case TextureType::Texture2D: + case TextureType::Texture2DNoMipmap: + ASSERT(config.Depth() == 1); + type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect; + break; + case TextureType::Texture3D: + type = ImageViewType::e3D; + break; + case TextureType::TextureCubemap: + ASSERT(config.Depth() == 1); + type = ImageViewType::Cube; + range.extent.layers = 6; + break; + case TextureType::Texture1DArray: + type = ImageViewType::e1DArray; + range.extent.layers = config.Depth(); + break; + case TextureType::Texture2DArray: + type = ImageViewType::e2DArray; + range.extent.layers = config.Depth(); + break; + case TextureType::Texture1DBuffer: + type = ImageViewType::Buffer; + break; + case TextureType::TextureCubeArray: + type = ImageViewType::CubeArray; + range.extent.layers = config.Depth() * 6; + break; + default: + UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); + break; + } +} + +ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_, + SubresourceRange range_) noexcept + : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE}, + y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE}, + w_source{RENDER_TARGET_SWIZZLE} {} + +bool ImageViewInfo::IsRenderTarget() const noexcept { + return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE && + z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE; +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h new file mode 100644 index 000000000..0c1f99117 --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.h @@ -0,0 +1,50 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <type_traits> + +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TICEntry; +using VideoCore::Surface::PixelFormat; + +/// Properties used to determine a image view +struct ImageViewInfo { + explicit ImageViewInfo() noexcept = default; + explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept; + explicit ImageViewInfo(ImageViewType type, PixelFormat format, + SubresourceRange range = {}) noexcept; + + auto operator<=>(const ImageViewInfo&) const noexcept = default; + + [[nodiscard]] bool IsRenderTarget() const noexcept; + + [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept { + return std::array{ + static_cast<SwizzleSource>(x_source), + static_cast<SwizzleSource>(y_source), + static_cast<SwizzleSource>(z_source), + static_cast<SwizzleSource>(w_source), + }; + } + + ImageViewType type{}; + PixelFormat format{}; + SubresourceRange range; + u8 x_source = static_cast<u8>(SwizzleSource::R); + u8 y_source = static_cast<u8>(SwizzleSource::G); + u8 z_source = static_cast<u8>(SwizzleSource::B); + u8 w_source = static_cast<u8>(SwizzleSource::A); +}; +static_assert(std::has_unique_object_representations_v<ImageViewInfo>); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h new file mode 100644 index 000000000..9b9544b07 --- /dev/null +++ b/src/video_core/texture_cache/render_targets.h @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <span> +#include <utility> + +#include "common/bit_cast.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +/// Framebuffer properties used to lookup a framebuffer +struct RenderTargets { + constexpr auto operator<=>(const RenderTargets&) const noexcept = default; + + constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept { + const auto contains = [elements](ImageViewId item) { + return std::ranges::find(elements, item) != elements.end(); + }; + return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id); + } + + std::array<ImageViewId, NUM_RT> color_buffer_ids; + ImageViewId depth_buffer_id; + std::array<u8, NUM_RT> draw_buffers{}; + Extent2D size; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash<VideoCommon::RenderTargets> { + size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept { + using VideoCommon::ImageViewId; + size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id); + for (const ImageViewId color_buffer_id : rt.color_buffer_ids) { + value ^= std::hash<ImageViewId>{}(color_buffer_id); + } + value ^= Common::BitCast<u64>(rt.draw_buffers); + value ^= Common::BitCast<u64>(rt.size); + return value; + } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h new file mode 100644 index 000000000..04539a43c --- /dev/null +++ b/src/video_core/texture_cache/samples_helper.h @@ -0,0 +1,55 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <utility> + +#include "common/assert.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) { + switch (num_samples) { + case 1: + return {0, 0}; + case 2: + return {1, 0}; + case 4: + return {1, 1}; + case 8: + return {2, 1}; + case 16: + return {2, 2}; + } + UNREACHABLE_MSG("Invalid number of samples={}", num_samples); + return {1, 1}; +} + +[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { + using Tegra::Texture::MsaaMode; + switch (msaa_mode) { + case MsaaMode::Msaa1x1: + return 1; + case MsaaMode::Msaa2x1: + case MsaaMode::Msaa2x1_D3D: + return 2; + case MsaaMode::Msaa2x2: + case MsaaMode::Msaa2x2_VC4: + case MsaaMode::Msaa2x2_VC12: + return 4; + case MsaaMode::Msaa4x2: + case MsaaMode::Msaa4x2_D3D: + case MsaaMode::Msaa4x2_VC8: + case MsaaMode::Msaa4x2_VC24: + return 8; + case MsaaMode::Msaa4x4: + return 16; + } + UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode)); + return 1; +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h new file mode 100644 index 000000000..eae3be6ea --- /dev/null +++ b/src/video_core/texture_cache/slot_vector.h @@ -0,0 +1,156 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <concepts> +#include <numeric> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/assert.h" +#include "common/common_types.h" + +namespace VideoCommon { + +struct SlotId { + static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max(); + + constexpr auto operator<=>(const SlotId&) const noexcept = default; + + constexpr explicit operator bool() const noexcept { + return index != INVALID_INDEX; + } + + u32 index = INVALID_INDEX; +}; + +template <class T> +requires std::is_nothrow_move_assignable_v<T>&& + std::is_nothrow_move_constructible_v<T> class SlotVector { +public: + ~SlotVector() noexcept { + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + if ((bits & 1) != 0) { + values[index + bit].object.~T(); + } + } + index += 64; + } + delete[] values; + } + + [[nodiscard]] T& operator[](SlotId id) noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + [[nodiscard]] const T& operator[](SlotId id) const noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + template <typename... Args> + [[nodiscard]] SlotId insert(Args&&... args) noexcept { + const u32 index = FreeValueIndex(); + new (&values[index].object) T(std::forward<Args>(args)...); + SetStorageBit(index); + + return SlotId{index}; + } + + void erase(SlotId id) noexcept { + values[id.index].object.~T(); + free_list.push_back(id.index); + ResetStorageBit(id.index); + } + +private: + struct NonTrivialDummy { + NonTrivialDummy() noexcept {} + }; + + union Entry { + Entry() noexcept : dummy{} {} + ~Entry() noexcept {} + + NonTrivialDummy dummy; + T object; + }; + + void SetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] |= u64(1) << (index % 64); + } + + void ResetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); + } + + bool ReadStorageBit(u32 index) noexcept { + return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; + } + + void ValidateIndex(SlotId id) const noexcept { + DEBUG_ASSERT(id); + DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); + DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); + } + + [[nodiscard]] u32 FreeValueIndex() noexcept { + if (free_list.empty()) { + Reserve(values_capacity ? (values_capacity << 1) : 1); + } + const u32 free_index = free_list.back(); + free_list.pop_back(); + return free_index; + } + + void Reserve(size_t new_capacity) noexcept { + Entry* const new_values = new Entry[new_capacity]; + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + const size_t i = index + bit; + if ((bits & 1) == 0) { + continue; + } + T& old_value = values[i].object; + new (&new_values[i].object) T(std::move(old_value)); + old_value.~T(); + } + index += 64; + } + + stored_bitset.resize((new_capacity + 63) / 64); + + const size_t old_free_size = free_list.size(); + free_list.resize(old_free_size + (new_capacity - values_capacity)); + std::iota(free_list.begin() + old_free_size, free_list.end(), + static_cast<u32>(values_capacity)); + + delete[] values; + values = new_values; + values_capacity = new_capacity; + } + + Entry* values = nullptr; + size_t values_capacity = 0; + size_t values_size = 0; + + std::vector<u64> stored_bitset; + std::vector<u32> free_list; +}; + +} // namespace VideoCommon + +template <> +struct std::hash<VideoCommon::SlotId> { + size_t operator()(const VideoCommon::SlotId& id) const noexcept { + return std::hash<u32>{}(id.index); + } +}; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp deleted file mode 100644 index b44c09d71..000000000 --- a/src/video_core/texture_cache/surface_base.cpp +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/algorithm.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/memory_manager.h" -#include "video_core/texture_cache/surface_base.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/textures/convert.h" - -namespace VideoCommon { - -MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); - -using Tegra::Texture::ConvertFromGuestToHost; -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::IsPixelFormatASTC; -using VideoCore::Surface::PixelFormat; - -StagingCache::StagingCache() = default; - -StagingCache::~StagingCache() = default; - -SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, - bool is_astc_supported) - : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels), - mipmap_offsets(params.num_levels) { - is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported; - host_memory_size = params.GetHostSizeInBytes(is_converted); - - std::size_t offset = 0; - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; - mipmap_sizes[level] = mipmap_size; - mipmap_offsets[level] = offset; - offset += mipmap_size; - } - layer_size = offset; - if (params.is_layered) { - if (params.is_tiled) { - layer_size = - SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); - } - guest_memory_size = layer_size * params.depth; - } else { - guest_memory_size = layer_size; - } -} - -MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { - const u32 src_bpp{params.GetBytesPerPixel()}; - const u32 dst_bpp{rhs.GetBytesPerPixel()}; - const bool ib1 = params.IsBuffer(); - const bool ib2 = rhs.IsBuffer(); - if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { - const bool cb1 = params.IsCompressed(); - const bool cb2 = rhs.IsCompressed(); - if (cb1 == cb2) { - return MatchTopologyResult::FullMatch; - } - return MatchTopologyResult::CompressUnmatch; - } - return MatchTopologyResult::None; -} - -MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { - // Buffer surface Check - if (params.IsBuffer()) { - const std::size_t wd1 = params.width * params.GetBytesPerPixel(); - const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); - if (wd1 == wd2) { - return MatchStructureResult::FullMatch; - } - return MatchStructureResult::None; - } - - // Linear Surface check - if (!params.is_tiled) { - if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { - if (params.width == rhs.width) { - return MatchStructureResult::FullMatch; - } else { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; - } - - // Tiled Surface check - if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, - params.tile_width_spacing, params.num_levels) == - std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing, rhs.num_levels)) { - if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { - return MatchStructureResult::FullMatch; - } - const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, - rhs.pixel_format); - const u32 hs = - SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); - const u32 w1 = params.GetBlockAlignedWidth(); - if (std::tie(w1, params.height) == std::tie(ws, hs)) { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; -} - -std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( - const GPUVAddr candidate_gpu_addr) const { - if (gpu_addr == candidate_gpu_addr) { - return {{0, 0}}; - } - - if (candidate_gpu_addr < gpu_addr) { - return std::nullopt; - } - - const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; - const auto layer{static_cast<u32>(relative_address / layer_size)}; - if (layer >= params.depth) { - return std::nullopt; - } - - const GPUVAddr mipmap_address = relative_address - layer_size * layer; - const auto mipmap_it = - Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); - if (mipmap_it == mipmap_offsets.end()) { - return std::nullopt; - } - - const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))}; - return std::make_pair(layer, level); -} - -std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { - const u32 layers{params.depth}; - const u32 mipmaps{params.num_levels}; - std::vector<CopyParams> result; - result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps)); - - for (u32 layer = 0; layer < layers; layer++) { - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); - } - } - return result; -} - -std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { - const u32 mipmaps{params.num_levels}; - std::vector<CopyParams> result; - result.reserve(mipmaps); - - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; - result.emplace_back(width, height, depth, level); - } - return result; -} - -void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, - u8* buffer, u32 level) { - const u32 width{params.GetMipWidth(level)}; - const u32 height{params.GetMipHeight(level)}; - const u32 block_height{params.GetMipBlockHeight(level)}; - const u32 block_depth{params.GetMipBlockDepth(level)}; - - std::size_t guest_offset{mipmap_offsets[level]}; - if (params.is_layered) { - std::size_t host_offset = 0; - const std::size_t guest_stride = layer_size; - const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.depth; ++layer) { - MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, - params.tile_width_spacing, buffer + host_offset, memory + guest_offset); - guest_offset += guest_stride; - host_offset += host_stride; - } - } else { - MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, - params.GetMipDepth(level), params.tile_width_spacing, buffer, - memory + guest_offset); - } -} - -void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, - StagingCache& staging_cache) { - MICROPROFILE_SCOPE(GPU_Load_Texture); - auto& staging_buffer = staging_cache.GetBuffer(0); - u8* host_ptr; - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", - params.block_width, static_cast<u32>(params.target)); - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; - SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, - staging_buffer.data() + host_offset, level); - } - } else { - ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); - const u32 bpp{params.GetBytesPerPixel()}; - const u32 block_width{params.GetDefaultBlockWidth()}; - const u32 block_height{params.GetDefaultBlockHeight()}; - const u32 width{(params.width + block_width - 1) / block_width}; - const u32 height{(params.height + block_height - 1) / block_height}; - const u32 copy_size{width * bpp}; - if (params.pitch == copy_size) { - std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); - } else { - const u8* start{host_ptr}; - u8* write_to{staging_buffer.data()}; - for (u32 h = height; h > 0; --h) { - std::memcpy(write_to, start, copy_size); - start += params.pitch; - write_to += copy_size; - } - } - } - - if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { - return; - } - - for (u32 level = params.num_levels; level--;) { - const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; - const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; - u8* const in_buffer = staging_buffer.data() + in_host_offset; - u8* const out_buffer = staging_buffer.data() + out_host_offset; - ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, - params.GetMipWidth(level), params.GetMipHeight(level), - params.GetMipDepth(level), true, true); - } -} - -void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, - StagingCache& staging_cache) { - MICROPROFILE_SCOPE(GPU_Flush_Texture); - auto& staging_buffer = staging_cache.GetBuffer(0); - u8* host_ptr; - - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - - if (params.target == SurfaceTarget::Texture3D) { - // Special case for 3D texture segments - memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - } - - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; - SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, - staging_buffer.data() + host_offset, level); - } - } else if (params.IsBuffer()) { - // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest - // memory. - std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); - } else { - ASSERT(params.target == SurfaceTarget::Texture2D); - ASSERT(params.num_levels == 1); - - const u32 bpp{params.GetBytesPerPixel()}; - const u32 copy_size{params.width * bpp}; - if (params.pitch == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); - } else { - u8* start{host_ptr}; - const u8* read_to{staging_buffer.data()}; - for (u32 h = params.height; h > 0; --h) { - std::memcpy(start, read_to, copy_size); - start += params.pitch; - read_to += copy_size; - } - } - } - memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h deleted file mode 100644 index 173f2edba..000000000 --- a/src/video_core/texture_cache/surface_base.h +++ /dev/null @@ -1,333 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <optional> -#include <tuple> -#include <unordered_map> -#include <vector> - -#include "common/common_types.h" -#include "video_core/gpu.h" -#include "video_core/morton.h" -#include "video_core/texture_cache/copy_params.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/texture_cache/surface_view.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon { - -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::SurfaceTarget; - -enum class MatchStructureResult : u32 { - FullMatch = 0, - SemiMatch = 1, - None = 2, -}; - -enum class MatchTopologyResult : u32 { - FullMatch = 0, - CompressUnmatch = 1, - None = 2, -}; - -class StagingCache { -public: - explicit StagingCache(); - ~StagingCache(); - - std::vector<u8>& GetBuffer(std::size_t index) { - return staging_buffer[index]; - } - - const std::vector<u8>& GetBuffer(std::size_t index) const { - return staging_buffer[index]; - } - - void SetSize(std::size_t size) { - staging_buffer.resize(size); - } - -private: - std::vector<std::vector<u8>> staging_buffer; -}; - -class SurfaceBaseImpl { -public: - void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - - void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - bool Overlaps(const VAddr start, const VAddr end) const { - return (cpu_addr < end) && (cpu_addr_end > start); - } - - bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { - const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; - return gpu_addr <= other_start && other_end <= gpu_addr_end; - } - - // Use only when recycling a surface - void SetGpuAddr(const GPUVAddr new_addr) { - gpu_addr = new_addr; - } - - VAddr GetCpuAddr() const { - return cpu_addr; - } - - VAddr GetCpuAddrEnd() const { - return cpu_addr_end; - } - - void SetCpuAddr(const VAddr new_addr) { - cpu_addr = new_addr; - cpu_addr_end = new_addr + guest_memory_size; - } - - const SurfaceParams& GetSurfaceParams() const { - return params; - } - - std::size_t GetSizeInBytes() const { - return guest_memory_size; - } - - std::size_t GetHostSizeInBytes() const { - return host_memory_size; - } - - std::size_t GetMipmapSize(const u32 level) const { - return mipmap_sizes[level]; - } - - bool IsLinear() const { - return !params.is_tiled; - } - - bool IsConverted() const { - return is_converted; - } - - bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { - return params.pixel_format == pixel_format; - } - - VideoCore::Surface::PixelFormat GetFormat() const { - return params.pixel_format; - } - - bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { - return params.target == target; - } - - MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; - - MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; - - bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { - return std::tie(gpu_addr, params.target, params.num_levels) == - std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && - params.target == SurfaceTarget::Texture2D && params.num_levels == 1; - } - - std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; - - std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const { - return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); - } - -protected: - explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, - bool is_astc_supported); - ~SurfaceBaseImpl() = default; - - virtual void DecorateSurfaceName() = 0; - - const SurfaceParams params; - std::size_t layer_size; - std::size_t guest_memory_size; - std::size_t host_memory_size; - GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; - VAddr cpu_addr_end{}; - bool is_converted{}; - - std::vector<std::size_t> mipmap_sizes; - std::vector<std::size_t> mipmap_offsets; - -private: - void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, - u32 level); - - std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const; - - std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const; -}; - -template <typename TView> -class SurfaceBase : public SurfaceBaseImpl { -public: - virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0; - - virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; - - void MarkAsModified(bool is_modified_, u64 tick) { - is_modified = is_modified_ || is_target; - modification_tick = tick; - } - - void MarkAsRenderTarget(bool is_target_, u32 index_) { - is_target = is_target_; - index = index_; - } - - void SetMemoryMarked(bool is_memory_marked_) { - is_memory_marked = is_memory_marked_; - } - - bool IsMemoryMarked() const { - return is_memory_marked; - } - - void SetSyncPending(bool is_sync_pending_) { - is_sync_pending = is_sync_pending_; - } - - bool IsSyncPending() const { - return is_sync_pending; - } - - void MarkAsPicked(bool is_picked_) { - is_picked = is_picked_; - } - - bool IsModified() const { - return is_modified; - } - - bool IsProtected() const { - // Only 3D slices are to be protected - return is_target && params.target == SurfaceTarget::Texture3D; - } - - bool IsRenderTarget() const { - return is_target; - } - - u32 GetRenderTarget() const { - return index; - } - - bool IsRegistered() const { - return is_registered; - } - - bool IsPicked() const { - return is_picked; - } - - void MarkAsRegistered(bool is_reg) { - is_registered = is_reg; - } - - u64 GetModificationTick() const { - return modification_tick; - } - - TView EmplaceOverview(const SurfaceParams& overview_params) { - const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; - return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); - } - - TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { - return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, - base_level, num_levels)); - } - - std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, - const GPUVAddr view_addr, - const std::size_t candidate_size, const u32 mipmap, - const u32 layer) { - const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; - if (!layer_mipmap) { - return {}; - } - const auto [end_layer, end_mipmap] = *layer_mipmap; - if (layer != end_layer) { - if (mipmap == 0 && end_mipmap == 0) { - return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1)); - } - return {}; - } else { - return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap)); - } - } - - std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, - const std::size_t candidate_size) { - if (params.target == SurfaceTarget::Texture3D || - view_params.target == SurfaceTarget::Texture3D || - (params.num_levels == 1 && !params.is_layered)) { - return {}; - } - const auto layer_mipmap{GetLayerMipmap(view_addr)}; - if (!layer_mipmap) { - return {}; - } - const auto [layer, mipmap] = *layer_mipmap; - if (GetMipmapSize(mipmap) != candidate_size) { - return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); - } - return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); - } - - TView GetMainView() const { - return main_view; - } - -protected: - explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params, - bool is_astc_supported) - : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {} - - ~SurfaceBase() = default; - - virtual TView CreateView(const ViewParams& view_key) = 0; - - TView main_view; - std::unordered_map<ViewParams, TView> views; - -private: - TView GetView(const ViewParams& key) { - const auto [entry, is_cache_miss] = views.try_emplace(key); - auto& view{entry->second}; - if (is_cache_miss) { - view = CreateView(key); - } - return view; - } - - static constexpr u32 NO_RT = 0xFFFFFFFF; - - bool is_modified{}; - bool is_target{}; - bool is_registered{}; - bool is_picked{}; - bool is_memory_marked{}; - bool is_sync_pending{}; - u32 index{NO_RT}; - u64 modification_tick{}; -}; - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp deleted file mode 100644 index 13dd16356..000000000 --- a/src/video_core/texture_cache/surface_params.cpp +++ /dev/null @@ -1,445 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <string> -#include <tuple> - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "core/core.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/surface_params.h" - -namespace VideoCommon { - -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::SurfaceTarget; -using VideoCore::Surface::SurfaceTargetFromTextureType; -using VideoCore::Surface::SurfaceType; - -namespace { - -SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { - switch (type) { - case Tegra::Shader::TextureType::Texture1D: - return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; - case Tegra::Shader::TextureType::Texture2D: - return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; - case Tegra::Shader::TextureType::Texture3D: - ASSERT(!is_array); - return SurfaceTarget::Texture3D; - case Tegra::Shader::TextureType::TextureCube: - return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; - default: - UNREACHABLE(); - return SurfaceTarget::Texture2D; - } -} - -SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { - switch (type) { - case Tegra::Shader::ImageType::Texture1D: - return SurfaceTarget::Texture1D; - case Tegra::Shader::ImageType::TextureBuffer: - return SurfaceTarget::TextureBuffer; - case Tegra::Shader::ImageType::Texture1DArray: - return SurfaceTarget::Texture1DArray; - case Tegra::Shader::ImageType::Texture2D: - return SurfaceTarget::Texture2D; - case Tegra::Shader::ImageType::Texture2DArray: - return SurfaceTarget::Texture2DArray; - case Tegra::Shader::ImageType::Texture3D: - return SurfaceTarget::Texture3D; - default: - UNREACHABLE(); - return SurfaceTarget::Texture2D; - } -} - -constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { - return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); -} - -} // Anonymous namespace - -SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Sampler& entry) { - SurfaceParams params; - params.is_tiled = tic.IsTiled(); - params.srgb_conversion = tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? tic.BlockWidth() : 0; - params.block_height = params.is_tiled ? tic.BlockHeight() : 0; - params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; - params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; - params.pixel_format = lookup_table.GetPixelFormat( - tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); - params.type = GetFormatType(params.pixel_format); - if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { - switch (params.pixel_format) { - case PixelFormat::R16_UNORM: - case PixelFormat::R16_FLOAT: - params.pixel_format = PixelFormat::D16_UNORM; - break; - case PixelFormat::R32_FLOAT: - params.pixel_format = PixelFormat::D32_FLOAT; - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", - static_cast<u32>(params.pixel_format)); - } - params.type = GetFormatType(params.pixel_format); - } - // TODO: on 1DBuffer we should use the tic info. - if (tic.IsBuffer()) { - params.target = SurfaceTarget::TextureBuffer; - params.width = tic.Width(); - params.pitch = params.width * params.GetBytesPerPixel(); - params.height = 1; - params.depth = 1; - params.num_levels = 1; - params.emulated_levels = 1; - params.is_layered = false; - } else { - params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array); - params.width = tic.Width(); - params.height = tic.Height(); - params.depth = tic.Depth(); - params.pitch = params.is_tiled ? 0 : tic.Pitch(); - if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { - params.depth *= 6; - } - params.num_levels = tic.max_mip_level + 1; - params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); - params.is_layered = params.IsLayered(); - } - return params; -} - -SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Image& entry) { - SurfaceParams params; - params.is_tiled = tic.IsTiled(); - params.srgb_conversion = tic.IsSrgbConversionEnabled(); - params.block_width = params.is_tiled ? tic.BlockWidth() : 0; - params.block_height = params.is_tiled ? tic.BlockHeight() : 0; - params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; - params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; - params.pixel_format = lookup_table.GetPixelFormat( - tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); - params.type = GetFormatType(params.pixel_format); - params.target = ImageTypeToSurfaceTarget(entry.type); - // TODO: on 1DBuffer we should use the tic info. - if (tic.IsBuffer()) { - params.target = SurfaceTarget::TextureBuffer; - params.width = tic.Width(); - params.pitch = params.width * params.GetBytesPerPixel(); - params.height = 1; - params.depth = 1; - params.num_levels = 1; - params.emulated_levels = 1; - params.is_layered = false; - } else { - params.width = tic.Width(); - params.height = tic.Height(); - params.depth = tic.Depth(); - params.pitch = params.is_tiled ? 0 : tic.Pitch(); - if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { - params.depth *= 6; - } - params.num_levels = tic.max_mip_level + 1; - params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); - params.is_layered = params.IsLayered(); - } - return params; -} - -SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) { - const auto& regs = maxwell3d.regs; - const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); - const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; - const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); - return { - .is_tiled = regs.zeta.memory_layout.type == - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, - .srgb_conversion = false, - .is_layered = is_layered, - .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U), - .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U), - .block_depth = block_depth, - .tile_width_spacing = 1, - .width = regs.zeta_width, - .height = regs.zeta_height, - .depth = is_layered ? regs.zeta_layers.Value() : 1U, - .pitch = 0, - .num_levels = 1, - .emulated_levels = 1, - .pixel_format = pixel_format, - .type = GetFormatType(pixel_format), - .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D, - }; -} - -SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, - std::size_t index) { - const auto& config{maxwell3d.regs.rt[index]}; - SurfaceParams params; - params.is_tiled = - config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || - config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; - params.block_width = config.memory_layout.block_width; - params.block_height = config.memory_layout.block_height; - params.block_depth = config.memory_layout.block_depth; - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.type = GetFormatType(params.pixel_format); - if (params.is_tiled) { - params.pitch = 0; - params.width = config.width; - } else { - const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; - params.pitch = config.width; - params.width = params.pitch / bpp; - } - params.height = config.height; - params.num_levels = 1; - params.emulated_levels = 1; - - if (config.memory_layout.is_3d != 0) { - params.depth = config.layers.Value(); - params.is_layered = false; - params.target = SurfaceTarget::Texture3D; - } else if (config.layers > 1) { - params.depth = config.layers.Value(); - params.is_layered = true; - params.target = SurfaceTarget::Texture2DArray; - } else { - params.depth = 1; - params.is_layered = false; - params.target = SurfaceTarget::Texture2D; - } - return params; -} - -SurfaceParams SurfaceParams::CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - const bool is_tiled = !config.linear; - const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format); - - SurfaceParams params{ - .is_tiled = is_tiled, - .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || - config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, - .is_layered = false, - .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, - .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, - .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, - .tile_width_spacing = 1, - .width = config.width, - .height = config.height, - .depth = 1, - .pitch = config.pitch, - .num_levels = 1, - .emulated_levels = 1, - .pixel_format = pixel_format, - .type = GetFormatType(pixel_format), - // TODO(Rodrigo): Try to guess texture arrays from parameters - .target = SurfaceTarget::Texture2D, - }; - - params.is_layered = params.IsLayered(); - return params; -} - -VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( - const VideoCommon::Shader::Sampler& entry) { - return TextureTypeToSurfaceTarget(entry.type, entry.is_array); -} - -VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( - const VideoCommon::Shader::Image& entry) { - return ImageTypeToSurfaceTarget(entry.type); -} - -bool SurfaceParams::IsLayered() const { - switch (target) { - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - return true; - default: - return false; - } -} - -// Auto block resizing algorithm from: -// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c -u32 SurfaceParams::GetMipBlockHeight(u32 level) const { - if (level == 0) { - return this->block_height; - } - - const u32 height_new{GetMipHeight(level)}; - const u32 default_block_height{GetDefaultBlockHeight()}; - const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; - const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); - return std::clamp(block_height_new, 3U, 7U) - 3U; -} - -u32 SurfaceParams::GetMipBlockDepth(u32 level) const { - if (level == 0) { - return this->block_depth; - } - if (is_layered) { - return 0; - } - - const u32 depth_new{GetMipDepth(level)}; - const u32 block_depth_new = Common::Log2Ceil32(depth_new); - if (block_depth_new > 4) { - return 5 - (GetMipBlockHeight(level) >= 2); - } - return block_depth_new; -} - -std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { - std::size_t offset = 0; - for (u32 i = 0; i < level; i++) { - offset += GetInnerMipmapMemorySize(i, false, false); - } - return offset; -} - -std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { - std::size_t offset = 0; - if (is_converted) { - for (u32 i = 0; i < level; ++i) { - offset += GetConvertedMipmapSize(i) * GetNumLayers(); - } - } else { - for (u32 i = 0; i < level; ++i) { - offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); - } - } - return offset; -} - -std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { - constexpr std::size_t rgba8_bpp = 4ULL; - const std::size_t mip_width = GetMipWidth(level); - const std::size_t mip_height = GetMipHeight(level); - const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); - return mip_width * mip_height * mip_depth * rgba8_bpp; -} - -std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { - std::size_t size = 0; - for (u32 level = 0; level < num_levels; ++level) { - size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); - } - if (is_tiled && is_layered) { - return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); - } - return size; -} - -std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, - bool uncompressed) const { - const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; - const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; - const u32 depth{is_layered ? 1U : GetMipDepth(level)}; - if (is_tiled) { - return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, - depth, GetMipBlockHeight(level), - GetMipBlockDepth(level)); - } else if (as_host_size || IsBuffer()) { - return GetBytesPerPixel() * width * height * depth; - } else { - // Linear Texture Case - return pitch * height * depth; - } -} - -bool SurfaceParams::operator==(const SurfaceParams& rhs) const { - return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, - height, depth, pitch, num_levels, pixel_format, type, target) == - std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, - rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target); -} - -std::string SurfaceParams::TargetName() const { - switch (target) { - case SurfaceTarget::Texture1D: - return "1D"; - case SurfaceTarget::TextureBuffer: - return "TexBuffer"; - case SurfaceTarget::Texture2D: - return "2D"; - case SurfaceTarget::Texture3D: - return "3D"; - case SurfaceTarget::Texture1DArray: - return "1DArray"; - case SurfaceTarget::Texture2DArray: - return "2DArray"; - case SurfaceTarget::TextureCubemap: - return "Cube"; - case SurfaceTarget::TextureCubeArray: - return "CubeArray"; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); - UNREACHABLE(); - return fmt::format("TUK({})", static_cast<u32>(target)); - } -} - -u32 SurfaceParams::GetBlockSize() const { - const u32 x = 64U << block_width; - const u32 y = 8U << block_height; - const u32 z = 1U << block_depth; - return x * y * z; -} - -std::pair<u32, u32> SurfaceParams::GetBlockXY() const { - const u32 x_pixels = 64U / GetBytesPerPixel(); - const u32 x = x_pixels << block_width; - const u32 y = 8U << block_height; - return {x, y}; -} - -std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { - const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; - const u32 block_size = GetBlockSize(); - const u32 block_index = offset / block_size; - const u32 gob_offset = offset % block_size; - const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE); - const u32 x_gob_pixels = 64U / GetBytesPerPixel(); - const u32 x_block_pixels = x_gob_pixels << block_width; - const u32 y_block_pixels = 8U << block_height; - const u32 z_block_pixels = 1U << block_depth; - const u32 x_blocks = div_ceil(width, x_block_pixels); - const u32 y_blocks = div_ceil(height, y_block_pixels); - const u32 z_blocks = div_ceil(depth, z_block_pixels); - const u32 base_x = block_index % x_blocks; - const u32 base_y = (block_index / x_blocks) % y_blocks; - const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks; - u32 x = base_x * x_block_pixels; - u32 y = base_y * y_block_pixels; - u32 z = base_z * z_block_pixels; - z += gob_index >> block_height; - y += (gob_index * 8U) % y_block_pixels; - return {x, y, z}; -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h deleted file mode 100644 index 4466c3c34..000000000 --- a/src/video_core/texture_cache/surface_params.h +++ /dev/null @@ -1,294 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <utility> - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "common/cityhash.h" -#include "common/common_types.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" - -namespace VideoCommon { - -class FormatLookupTable; - -class SurfaceParams { -public: - /// Creates SurfaceCachedParams from a texture configuration. - static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Sampler& entry); - - /// Creates SurfaceCachedParams from an image configuration. - static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table, - const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Image& entry); - - /// Creates SurfaceCachedParams for a depth buffer configuration. - static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d); - - /// Creates SurfaceCachedParams from a framebuffer configuration. - static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, - std::size_t index); - - /// Creates SurfaceCachedParams from a Fermi2D surface configuration. - static SurfaceParams CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config); - - /// Obtains the texture target from a shader's sampler entry. - static VideoCore::Surface::SurfaceTarget ExpectedTarget( - const VideoCommon::Shader::Sampler& entry); - - /// Obtains the texture target from a shader's sampler entry. - static VideoCore::Surface::SurfaceTarget ExpectedTarget( - const VideoCommon::Shader::Image& entry); - - std::size_t Hash() const { - return static_cast<std::size_t>( - Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); - } - - bool operator==(const SurfaceParams& rhs) const; - - bool operator!=(const SurfaceParams& rhs) const { - return !operator==(rhs); - } - - std::size_t GetGuestSizeInBytes() const { - return GetInnerMemorySize(false, false, false); - } - - std::size_t GetHostSizeInBytes(bool is_converted) const { - if (!is_converted) { - return GetInnerMemorySize(true, false, false); - } - // ASTC is uncompressed in software, in emulated as RGBA8 - std::size_t host_size_in_bytes = 0; - for (u32 level = 0; level < num_levels; ++level) { - host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); - } - return host_size_in_bytes; - } - - u32 GetBlockAlignedWidth() const { - return Common::AlignUp(width, 64 / GetBytesPerPixel()); - } - - /// Returns the width of a given mipmap level. - u32 GetMipWidth(u32 level) const { - return std::max(1U, width >> level); - } - - /// Returns the height of a given mipmap level. - u32 GetMipHeight(u32 level) const { - return std::max(1U, height >> level); - } - - /// Returns the depth of a given mipmap level. - u32 GetMipDepth(u32 level) const { - return is_layered ? depth : std::max(1U, depth >> level); - } - - /// Returns the block height of a given mipmap level. - u32 GetMipBlockHeight(u32 level) const; - - /// Returns the block depth of a given mipmap level. - u32 GetMipBlockDepth(u32 level) const; - - /// Returns the best possible row/pitch alignment for the surface. - u32 GetRowAlignment(u32 level, bool is_converted) const { - const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); - return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); - } - - /// Returns the offset in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapLevelOffset(u32 level) const; - - /// Returns the offset in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; - - /// Returns the size in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, false, false); - } - - /// Returns the size in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); - } - - std::size_t GetConvertedMipmapSize(u32 level) const; - - /// Get this texture Tegra Block size in guest memory layout - u32 GetBlockSize() const; - - /// Get X, Y coordinates max sizes of a single block. - std::pair<u32, u32> GetBlockXY() const; - - /// Get the offset in x, y, z coordinates from a memory offset - std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const; - - /// Returns the size of a layer in bytes in guest memory. - std::size_t GetGuestLayerSize() const { - return GetLayerSize(false, false); - } - - /// Returns the size of a layer in bytes in host memory for a given mipmap level. - std::size_t GetHostLayerSize(u32 level) const { - ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); - return GetInnerMipmapMemorySize(level, true, false); - } - - /// Returns the max possible mipmap that the texture can have in host gpu - u32 MaxPossibleMipmap() const { - const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; - const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; - const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); - if (target != VideoCore::Surface::SurfaceTarget::Texture3D) - return max_mipmap; - return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); - } - - /// Returns if the guest surface is a compressed surface. - bool IsCompressed() const { - return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; - } - - /// Returns the default block width. - u32 GetDefaultBlockWidth() const { - return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); - } - - /// Returns the default block height. - u32 GetDefaultBlockHeight() const { - return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); - } - - /// Returns the bits per pixel. - u32 GetBitsPerPixel() const { - return VideoCore::Surface::GetFormatBpp(pixel_format); - } - - /// Returns the bytes per pixel. - u32 GetBytesPerPixel() const { - return VideoCore::Surface::GetBytesPerPixel(pixel_format); - } - - /// Returns true if the pixel format is a depth and/or stencil format. - bool IsPixelFormatZeta() const { - return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && - pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; - } - - /// Returns is the surface is a TextureBuffer type of surface. - bool IsBuffer() const { - return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; - } - - /// Returns the number of layers in the surface. - std::size_t GetNumLayers() const { - return is_layered ? depth : 1; - } - - /// Returns the debug name of the texture for use in graphic debuggers. - std::string TargetName() const; - - // Helper used for out of class size calculations - static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, - const u32 block_depth) { - return Common::AlignBits(out_size, - Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); - } - - /// Converts a width from a type of surface into another. This helps represent the - /// equivalent value between compressed/non-compressed textures. - static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); - const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); - return (width * bw2 + bw1 - 1) / bw1; - } - - /// Converts a height from a type of surface into another. This helps represent the - /// equivalent value between compressed/non-compressed textures. - static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); - const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); - return (height * bh2 + bh1 - 1) / bh1; - } - - // Finds the maximun possible width between 2 2D layers of different formats - static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bw1 = src_params.GetDefaultBlockWidth(); - const u32 bw2 = dst_params.GetDefaultBlockWidth(); - const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; - const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; - return std::min(t_src_width, t_dst_width); - } - - // Finds the maximun possible height between 2 2D layers of different formats - static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bh1 = src_params.GetDefaultBlockHeight(); - const u32 bh2 = dst_params.GetDefaultBlockHeight(); - const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; - const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; - return std::min(t_src_height, t_dst_height); - } - - bool is_tiled; - bool srgb_conversion; - bool is_layered; - u32 block_width; - u32 block_height; - u32 block_depth; - u32 tile_width_spacing; - u32 width; - u32 height; - u32 depth; - u32 pitch; - u32 num_levels; - u32 emulated_levels; - VideoCore::Surface::PixelFormat pixel_format; - VideoCore::Surface::SurfaceType type; - VideoCore::Surface::SurfaceTarget target; - -private: - /// Returns the size of a given mipmap level inside a layer. - std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; - - /// Returns the size of all mipmap levels and aligns as needed. - std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { - return GetLayerSize(as_host_size, uncompressed) * - (layer_only ? 1U : (is_layered ? depth : 1U)); - } - - /// Returns the size of a layer - std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; - - /// Returns true if these parameters are from a layered surface. - bool IsLayered() const; -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash<VideoCommon::SurfaceParams> { - std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp deleted file mode 100644 index 6b5f5984b..000000000 --- a/src/video_core/texture_cache/surface_view.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <tuple> - -#include "common/common_types.h" -#include "video_core/texture_cache/surface_view.h" - -namespace VideoCommon { - -std::size_t ViewParams::Hash() const { - return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^ - (static_cast<std::size_t>(base_level) << 24) ^ - (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); -} - -bool ViewParams::operator==(const ViewParams& rhs) const { - return std::tie(base_layer, num_layers, base_level, num_levels, target) == - std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); -} - -bool ViewParams::operator!=(const ViewParams& rhs) const { - return !operator==(rhs); -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h deleted file mode 100644 index 90a8bb0ae..000000000 --- a/src/video_core/texture_cache/surface_view.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <functional> - -#include "common/common_types.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/surface_params.h" - -namespace VideoCommon { - -struct ViewParams { - constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, - u32 num_layers, u32 base_level, u32 num_levels) - : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, - num_levels{num_levels} {} - - std::size_t Hash() const; - - bool operator==(const ViewParams& rhs) const; - bool operator!=(const ViewParams& rhs) const; - - bool IsLayered() const { - switch (target) { - case VideoCore::Surface::SurfaceTarget::Texture1DArray: - case VideoCore::Surface::SurfaceTarget::Texture2DArray: - case VideoCore::Surface::SurfaceTarget::TextureCubemap: - case VideoCore::Surface::SurfaceTarget::TextureCubeArray: - return true; - default: - return false; - } - } - - VideoCore::Surface::SurfaceTarget target{}; - u32 base_layer{}; - u32 num_layers{}; - u32 base_level{}; - u32 num_levels{}; -}; - -class ViewBase { -public: - constexpr explicit ViewBase(const ViewParams& params) : params{params} {} - - constexpr const ViewParams& GetViewParams() const { - return params; - } - -protected: - ViewParams params; -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash<VideoCommon::ViewParams> { - std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ea835c59f..968059842 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -6,1299 +6,1449 @@ #include <algorithm> #include <array> -#include <list> +#include <bit> #include <memory> #include <mutex> -#include <set> -#include <tuple> +#include <optional> +#include <span> +#include <type_traits> #include <unordered_map> +#include <utility> #include <vector> #include <boost/container/small_vector.hpp> -#include <boost/icl/interval_map.hpp> -#include <boost/range/iterator_range.hpp> -#include "common/assert.h" +#include "common/alignment.h" +#include "common/common_funcs.h" #include "common/common_types.h" -#include "common/math_util.h" -#include "core/core.h" -#include "core/memory.h" -#include "core/settings.h" +#include "common/logging/log.h" #include "video_core/compatible_formats.h" +#include "video_core/delayed_destruction_ring.h" #include "video_core/dirty_flags.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" -#include "video_core/texture_cache/copy_params.h" +#include "video_core/texture_cache/descriptor_table.h" #include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/surface_base.h" -#include "video_core/texture_cache/surface_params.h" -#include "video_core/texture_cache/surface_view.h" - -namespace Tegra::Texture { -struct FullTextureInfo; -} - -namespace VideoCore { -class RasterizerInterface; -} +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/slot_vector.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" namespace VideoCommon { -using VideoCore::Surface::FormatCompatibility; +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCore::Surface::GetFormatType; +using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; -using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; -template <typename TSurface, typename TView> +template <class P> class TextureCache { - using VectorSurface = boost::container::small_vector<TSurface, 1>; + /// Address shift for caching images into a hash table + static constexpr u64 PAGE_SHIFT = 20; + + /// Enables debugging features to the texture cache + static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; + /// Implement blits as copies between framebuffers + static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; + /// True when some copies have to be emulated + static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + + /// Image view ID for null descriptors + static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; + /// Sampler ID for bugged sampler ids + static constexpr SamplerId NULL_SAMPLER_ID{0}; + + using Runtime = typename P::Runtime; + using Image = typename P::Image; + using ImageAlloc = typename P::ImageAlloc; + using ImageView = typename P::ImageView; + using Sampler = typename P::Sampler; + using Framebuffer = typename P::Framebuffer; + + struct BlitImages { + ImageId dst_id; + ImageId src_id; + PixelFormat dst_format; + PixelFormat src_format; + }; + + template <typename T> + struct IdentityHash { + [[nodiscard]] size_t operator()(T value) const noexcept { + return static_cast<size_t>(value); + } + }; public: - void InvalidateRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, + Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); - for (const auto& surface : GetSurfacesInRegion(addr, size)) { - Unregister(surface); - } - } + /// Notify the cache that a new frame has been queued + void TickFrame(); - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + /// Return an unique mutually exclusive lock for the cache + [[nodiscard]] std::unique_lock<std::mutex> AcquireLock(); - for (const auto& surface : GetSurfacesInRegion(addr, size)) { - if (surface->IsMemoryMarked()) { - UnmarkMemory(surface); - surface->SetSyncPending(true); - marked_for_unregister.emplace_back(surface); - } - } - } + /// Return a constant reference to the given image view id + [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; - void SyncGuestHost() { - std::lock_guard lock{mutex}; + /// Return a reference to the given image view id + [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; - for (const auto& surface : marked_for_unregister) { - if (surface->IsRegistered()) { - surface->SetSyncPending(false); - Unregister(surface); - } - } - marked_for_unregister.clear(); - } + /// Fill image_view_ids with the graphics images in indices + void FillGraphicsImageViews(std::span<const u32> indices, + std::span<ImageViewId> image_view_ids); - /** - * Guarantees that rendertargets don't unregister themselves if the - * collide. Protection is currently only done on 3D slices. - */ - void GuardRenderTargets(bool new_guard) { - guard_render_targets = new_guard; - } + /// Fill image_view_ids with the compute images in indices + void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); - void GuardSamplers(bool new_guard) { - guard_samplers = new_guard; - } + /// Get the sampler from the graphics descriptor table in the specified index + Sampler* GetGraphicsSampler(u32 index); - void FlushRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + /// Get the sampler from the compute descriptor table in the specified index + Sampler* GetComputeSampler(u32 index); - auto surfaces = GetSurfacesInRegion(addr, size); - if (surfaces.empty()) { - return; - } - std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { - return a->GetModificationTick() < b->GetModificationTick(); - }); - for (const auto& surface : surfaces) { - mutex.unlock(); - FlushSurface(surface); - mutex.lock(); - } - } + /// Refresh the state for graphics image view and sampler descriptors + void SynchronizeGraphicsDescriptors(); - bool MustFlushRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + /// Refresh the state for compute image view and sampler descriptors + void SynchronizeComputeDescriptors(); - const auto surfaces = GetSurfacesInRegion(addr, size); - return std::any_of(surfaces.cbegin(), surfaces.cend(), - [](const TSurface& surface) { return surface->IsModified(); }); - } + /// Update bound render targets and upload memory if necessary + /// @param is_clear True when the render targets are being used for clears + void UpdateRenderTargets(bool is_clear); - TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Sampler& entry) { - std::lock_guard lock{mutex}; - const auto gpu_addr{tic.Address()}; - if (!gpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } + /// Find a framebuffer with the currently bound render targets + /// UpdateRenderTargets should be called before this + Framebuffer* GetFramebuffer(); - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } + /// Mark images in a range as modified from the CPU + void WriteMemory(VAddr cpu_addr, size_t size); - if (!IsTypeCompatible(tic.texture_type, entry)) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } + /// Download contents of host images to guest memory in a region + void DownloadMemory(VAddr cpu_addr, size_t size); - const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); - if (guard_samplers) { - sampled_textures.push_back(surface); - } - return view; - } + /// Remove images in a region + void UnmapMemory(VAddr cpu_addr, size_t size); - TView GetImageSurface(const Tegra::Texture::TICEntry& tic, - const VideoCommon::Shader::Image& entry) { - std::lock_guard lock{mutex}; - const auto gpu_addr{tic.Address()}; - if (!gpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); - } - const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); - if (guard_samplers) { - sampled_textures.push_back(surface); - } - return view; - } + /// Blit an image with the given parameters + void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy); - bool TextureBarrier() { - const bool any_rt = - std::any_of(sampled_textures.begin(), sampled_textures.end(), - [](const auto& surface) { return surface->IsRenderTarget(); }); - sampled_textures.clear(); - return any_rt; - } + /// Invalidate the contents of the color buffer index + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateColorBuffer(size_t index); - TView GetDepthBufferSurface(bool preserve_contents) { - std::lock_guard lock{mutex}; - auto& dirty = maxwell3d.dirty; - if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { - return depth_buffer.view; - } - dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; + /// Invalidate the contents of the depth buffer + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateDepthBuffer(); - const auto& regs{maxwell3d.regs}; - const auto gpu_addr{regs.zeta.Address()}; - if (!gpu_addr || !regs.zeta_enable) { - SetEmptyDepthBuffer(); - return {}; - } - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - SetEmptyDepthBuffer(); - return {}; - } - const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; - auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); - if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(false, NO_RT); - depth_buffer.target = surface_view.first; - depth_buffer.view = surface_view.second; - if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); - return surface_view.second; - } - - TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { - std::lock_guard lock{mutex}; - ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); - if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { - return render_targets[index].view; - } - maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; + /// Try to find a cached image view in the given CPU address + [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); - const auto& regs{maxwell3d.regs}; - if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || - regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - SetEmptyColorBuffer(index); - return {}; - } + /// Return true when there are uncommitted images to be downloaded + [[nodiscard]] bool HasUncommittedFlushes() const noexcept; - const auto& config{regs.rt[index]}; - const auto gpu_addr{config.Address()}; - if (!gpu_addr) { - SetEmptyColorBuffer(index); - return {}; - } + /// Return true when the caller should wait for async downloads + [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - SetEmptyColorBuffer(index); - return {}; - } + /// Commit asynchronous downloads + void CommitAsyncFlushes(); + + /// Pop asynchronous downloads + void PopAsyncFlushes(); + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); - auto surface_view = - GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), - preserve_contents, true); - if (render_targets[index].target) { - auto& surface = render_targets[index].target; - surface->MarkAsRenderTarget(false, NO_RT); - const auto& cr_params = surface->GetSurfaceParams(); - if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - AsyncFlushSurface(surface); +private: + /// Iterate over all page indices in a range + template <typename Func> + static void ForEachPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_SHIFT; + for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); } } - render_targets[index].target = surface_view.first; - render_targets[index].view = surface_view.second; - if (render_targets[index].target) - render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); - return surface_view.second; } - void MarkColorBufferInUse(std::size_t index) { - if (auto& render_target = render_targets[index].target) { - render_target->MarkAsModified(true, Tick()); - } - } + /// Fills image_view_ids in the image views in indices + void FillImageViews(DescriptorTable<TICEntry>& table, + std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, + std::span<ImageViewId> image_view_ids); - void MarkDepthBufferInUse() { - if (depth_buffer.target) { - depth_buffer.target->MarkAsModified(true, Tick()); - } - } + /// Find or create an image view in the guest descriptor table + ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, + std::span<ImageViewId> cached_image_view_ids, u32 index); - void SetEmptyDepthBuffer() { - if (depth_buffer.target == nullptr) { - return; - } - depth_buffer.target->MarkAsRenderTarget(false, NO_RT); - depth_buffer.target = nullptr; - depth_buffer.view = nullptr; - } + /// Find or create a framebuffer with the given render target parameters + FramebufferId GetFramebufferId(const RenderTargets& key); - void SetEmptyColorBuffer(std::size_t index) { - if (render_targets[index].target == nullptr) { - return; - } - render_targets[index].target->MarkAsRenderTarget(false, NO_RT); - render_targets[index].target = nullptr; - render_targets[index].view = nullptr; - } - - void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, - const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Tegra::Engines::Fermi2D::Config& copy_config) { - std::lock_guard lock{mutex}; - SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); - SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); - const GPUVAddr src_gpu_addr = src_config.Address(); - const GPUVAddr dst_gpu_addr = dst_config.Address(); - DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - - const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); - const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); - std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); - TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; - ImageBlit(src_surface, dst_surface.second, copy_config); - dst_surface.first->MarkAsModified(true, Tick()); - } - - TSurface TryFindFramebufferSurface(VAddr addr) const { - if (!addr) { - return nullptr; - } - const VAddr page = addr >> registry_page_bits; - const auto it = registry.find(page); - if (it == registry.end()) { - return nullptr; - } - const auto& list = it->second; - const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { - return surface->GetCpuAddr() == addr; - }); - return found != list.end() ? *found : nullptr; - } + /// Refresh the contents (pixel data) of an image + void RefreshContents(Image& image); - u64 Tick() { - return ++ticks; - } + /// Upload data from guest to an image + template <typename MapBuffer> + void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); - void CommitAsyncFlushes() { - committed_flushes.push_back(uncommitted_flushes); - uncommitted_flushes.reset(); - } + /// Find or create an image view from a guest descriptor + [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); - bool HasUncommittedFlushes() const { - return uncommitted_flushes != nullptr; - } + /// Create a new image view from a guest descriptor + [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); - bool ShouldWaitAsyncFlushes() const { - return !committed_flushes.empty() && committed_flushes.front() != nullptr; - } + /// Find or create an image from the given parameters + [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options = RelaxedOptions{}); - void PopAsyncFlushes() { - if (committed_flushes.empty()) { - return; - } - auto& flush_list = committed_flushes.front(); - if (!flush_list) { - committed_flushes.pop_front(); - return; - } - for (TSurface& surface : *flush_list) { - FlushSurface(surface); - } - committed_flushes.pop_front(); - } + /// Find an image from the given parameters + [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); -protected: - explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, - bool is_astc_supported_) - : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - gpu_memory{gpu_memory_} { - for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - SetEmptyColorBuffer(i); - } + /// Create an image from the given parameters + [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); - SetEmptyDepthBuffer(); - staging_cache.SetSize(2); + /// Create a new image and join perfectly matching existing images + /// Remove joined images from the cache + [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); - const auto make_siblings = [this](PixelFormat a, PixelFormat b) { - siblings_table[static_cast<std::size_t>(a)] = b; - siblings_table[static_cast<std::size_t>(b)] = a; - }; - std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); - make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); - make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); - make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); + /// Return a blit image pair from the given guest blit parameters + [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src); - sampled_textures.reserve(64); - } + /// Find or create a sampler from a guest descriptor sampler + [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); - ~TextureCache() = default; + /// Find or create an image view for the given color buffer index + [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); - virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; + /// Find or create an image view for the depth buffer + [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); - virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, - const CopyParams& copy_params) = 0; + /// Find or create a view for a render target with the given image parameters + [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear); - virtual void ImageBlit(TView& src_view, TView& dst_view, - const Tegra::Engines::Fermi2D::Config& copy_config) = 0; + /// Iterates over all the images in a region calling func + template <typename Func> + void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); - // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture - // and reading it from a separate buffer. - virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; + /// Find or create an image view in the given image with the passed parameters + [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); - void ManageRenderTargetUnregister(TSurface& surface) { - auto& dirty = maxwell3d.dirty; - const u32 index = surface->GetRenderTarget(); - if (index == DEPTH_RT) { - dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; - } else { - dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; - } - dirty.flags[VideoCommon::Dirty::RenderTargets] = true; + /// Register image in the page table + void RegisterImage(ImageId image); + + /// Unregister image from the page table + void UnregisterImage(ImageId image); + + /// Track CPU reads and writes for image + void TrackImage(ImageBase& image); + + /// Stop tracking CPU reads and writes for image + void UntrackImage(ImageBase& image); + + /// Delete image from the cache + void DeleteImage(ImageId image); + + /// Remove image views references from the cache + void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); + + /// Remove framebuffers using the given image views from the cache + void RemoveFramebuffers(std::span<const ImageViewId> removed_views); + + /// Mark an image as modified from the GPU + void MarkModification(ImageBase& image) noexcept; + + /// Synchronize image aliases, copying data if needed + void SynchronizeAliases(ImageId image_id); + + /// Prepare an image to be used + void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); + + /// Prepare an image view to be used + void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); + + /// Execute copies from one image to the other, even if they are incompatible + void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); + + /// Bind an image view as render target, downloading resources preemtively if needed + void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); + + /// Create a render target from a given image and image view parameters + [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( + ImageId, const ImageViewInfo& view_info); + + /// Returns true if the current clear parameters clear the whole image of a given image view + [[nodiscard]] bool IsFullClear(ImageViewId id); + + Runtime& runtime; + VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + + DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; + DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; + std::vector<SamplerId> graphics_sampler_ids; + std::vector<ImageViewId> graphics_image_view_ids; + + DescriptorTable<TICEntry> compute_image_table{gpu_memory}; + DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; + std::vector<SamplerId> compute_sampler_ids; + std::vector<ImageViewId> compute_image_view_ids; + + RenderTargets render_targets; + + std::mutex mutex; + + std::unordered_map<TICEntry, ImageViewId> image_views; + std::unordered_map<TSCEntry, SamplerId> samplers; + std::unordered_map<RenderTargets, FramebufferId> framebuffers; + + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; + + bool has_deleted_images = false; + + SlotVector<Image> slot_images; + SlotVector<ImageView> slot_image_views; + SlotVector<ImageAlloc> slot_image_allocs; + SlotVector<Sampler> slot_samplers; + SlotVector<Framebuffer> slot_framebuffers; + + // TODO: This data structure is not optimal and it should be reworked + std::vector<ImageId> uncommitted_downloads; + std::queue<std::vector<ImageId>> committed_downloads; + + static constexpr size_t TICKS_TO_DESTROY = 6; + DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; + DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; + DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; + + std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; + + u64 modification_tick = 0; + u64 frame_tick = 0; +}; + +template <class P> +TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_) + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { + // Configure null sampler + TSCEntry sampler_descriptor{}; + sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); + sampler_descriptor.cubemap_anisotropy.Assign(1); + + // Make sure the first index is reserved for the null resources + // This way the null resource becomes a compile time constant + void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_samplers.insert(runtime, sampler_descriptor)); +} + +template <class P> +void TextureCache<P>::TickFrame() { + // Tick sentenced resources in this order to ensure they are destroyed in the right order + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + ++frame_tick; +} + +template <class P> +std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() { + return std::unique_lock{mutex}; +} + +template <class P> +const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { + return slot_image_views[id]; +} + +template <class P> +typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { + return slot_image_views[id]; +} + +template <class P> +void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, + std::span<ImageViewId> image_view_ids) { + FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +} + +template <class P> +void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, + std::span<ImageViewId> image_view_ids) { + FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +} + +template <class P> +typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { + [[unlikely]] if (index > graphics_sampler_table.Limit()) { + LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = graphics_sampler_table.Read(index); + SamplerId& id = graphics_sampler_ids[index]; + [[unlikely]] if (is_new) { + id = FindSampler(descriptor); } + return &slot_samplers[id]; +} - void Register(TSurface surface) { - const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const std::size_t size = surface->GetSizeInBytes(); - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", - gpu_addr); - return; - } - surface->SetCpuAddr(*cpu_addr); - RegisterInnerCache(surface); - surface->MarkAsRegistered(true); - surface->SetMemoryMarked(true); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); +template <class P> +typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { + [[unlikely]] if (index > compute_sampler_table.Limit()) { + LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = compute_sampler_table.Read(index); + SamplerId& id = compute_sampler_ids[index]; + [[unlikely]] if (is_new) { + id = FindSampler(descriptor); } + return &slot_samplers[id]; +} - void UnmarkMemory(TSurface surface) { - if (!surface->IsMemoryMarked()) { - return; - } - const std::size_t size = surface->GetSizeInBytes(); - const VAddr cpu_addr = surface->GetCpuAddr(); - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); - surface->SetMemoryMarked(false); +template <class P> +void TextureCache<P>::SynchronizeGraphicsDescriptors() { + using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; + const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; + if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { + graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } + if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { + graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} - void Unregister(TSurface surface) { - if (guard_render_targets && surface->IsProtected()) { - return; - } - if (!guard_render_targets && surface->IsRenderTarget()) { - ManageRenderTargetUnregister(surface); - } - UnmarkMemory(surface); - if (surface->IsSyncPending()) { - marked_for_unregister.remove(surface); - surface->SetSyncPending(false); - } - UnregisterInnerCache(surface); - surface->MarkAsRegistered(false); - ReserveSurface(surface->GetSurfaceParams(), surface); +template <class P> +void TextureCache<P>::SynchronizeComputeDescriptors() { + const bool linked_tsc = kepler_compute.launch_description.linked_tsc; + const u32 tic_limit = kepler_compute.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); + if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } + if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { + compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} - TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - if (const auto surface = TryGetReservedSurface(params); surface) { - surface->SetGpuAddr(gpu_addr); - return surface; - } - // No reserved surface available, create a new one and reserve it - auto new_surface{CreateSurface(gpu_addr, params)}; - return new_surface; +template <class P> +void TextureCache<P>::UpdateRenderTargets(bool is_clear) { + using namespace VideoCommon::Dirty; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::RenderTargets]) { + return; } + flags[Dirty::RenderTargets] = false; - const bool is_astc_supported; + // Render target control is used on all render targets, so force look ups when this one is up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; -private: - enum class RecycleStrategy : u32 { - Ignore = 0, - Flush = 1, - BufferCopy = 3, - }; + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - enum class DeductionType : u32 { - DeductionComplete, - DeductionIncomplete, - DeductionFailed, + for (size_t index = 0; index < NUM_RT; ++index) { + render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); + } + render_targets.size = Extent2D{ + maxwell3d.regs.render_area.width, + maxwell3d.regs.render_area.height, }; +} - struct Deduction { - DeductionType type{DeductionType::DeductionFailed}; - TSurface surface{}; +template <class P> +typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { + return &slot_framebuffers[GetFramebufferId(render_targets)]; +} - bool Failed() const { - return type == DeductionType::DeductionFailed; - } +template <class P> +void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, + std::span<ImageViewId> cached_image_view_ids, + std::span<const u32> indices, + std::span<ImageViewId> image_view_ids) { + ASSERT(indices.size() <= image_view_ids.size()); + do { + has_deleted_images = false; + std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { + return VisitImageView(table, cached_image_view_ids, index); + }); + } while (has_deleted_images); +} - bool Incomplete() const { - return type == DeductionType::DeductionIncomplete; - } +template <class P> +ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, + std::span<ImageViewId> cached_image_view_ids, + u32 index) { + if (index > table.Limit()) { + LOG_ERROR(HW_GPU, "Invalid image view index={}", index); + return NULL_IMAGE_VIEW_ID; + } + const auto [descriptor, is_new] = table.Read(index); + ImageViewId& image_view_id = cached_image_view_ids[index]; + if (is_new) { + image_view_id = FindImageView(descriptor); + } + if (image_view_id != NULL_IMAGE_VIEW_ID) { + PrepareImageView(image_view_id, false, false); + } + return image_view_id; +} - bool IsDepth() const { - return surface->GetSurfaceParams().IsPixelFormatZeta(); - } - }; +template <class P> +FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { + const auto [pair, is_new] = framebuffers.try_emplace(key); + FramebufferId& framebuffer_id = pair->second; + if (!is_new) { + return framebuffer_id; + } + std::array<ImageView*, NUM_RT> color_buffers; + std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), + [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); + ImageView* const depth_buffer = + key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); + return framebuffer_id; +} - /** - * Takes care of selecting a proper strategy to deal with a texture recycle. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param untopological Indicates to the recycler that the texture has no way - * to match the overlaps due to topological reasons. - **/ - RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { - if (Settings::IsGPULevelExtreme()) { - return RecycleStrategy::Flush; - } - // 3D Textures decision - if (params.target == SurfaceTarget::Texture3D) { - return RecycleStrategy::Flush; - } - for (const auto& s : overlaps) { - const auto& s_params = s->GetSurfaceParams(); - if (s_params.target == SurfaceTarget::Texture3D) { - return RecycleStrategy::Flush; - } - } - // Untopological decision - if (untopological == MatchTopologyResult::CompressUnmatch) { - return RecycleStrategy::Flush; - } - if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { - return RecycleStrategy::Flush; - } - return RecycleStrategy::Ignore; - } - - /** - * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented - * strategies: Ignore and Flush. - * - * - Ignore: Just unregisters all the overlaps and loads the new texture. - * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters for the new surface. - * @param gpu_addr The starting address of the new surface. - * @param preserve_contents Indicates that the new surface should be loaded from memory or left - * blank. - * @param untopological Indicates to the recycler that the texture has no way to match the - * overlaps due to topological reasons. - **/ - std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const bool preserve_contents, - const MatchTopologyResult untopological) { - const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); - for (auto& surface : overlaps) { - Unregister(surface); - } - switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { - case RecycleStrategy::Ignore: { - return InitializeSurface(gpu_addr, params, do_load); - } - case RecycleStrategy::Flush: { - std::sort(overlaps.begin(), overlaps.end(), - [](const TSurface& a, const TSurface& b) -> bool { - return a->GetModificationTick() < b->GetModificationTick(); - }); - for (auto& surface : overlaps) { - FlushSurface(surface); - } - return InitializeSurface(gpu_addr, params, preserve_contents); +template <class P> +void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { + ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; } - case RecycleStrategy::BufferCopy: { - auto new_surface = GetUncachedSurface(gpu_addr, params); - BufferCopy(overlaps[0], new_surface); - return {new_surface, new_surface->GetMainView()}; + image.flags |= ImageFlagBits::CpuModified; + UntrackImage(image); + }); +} + +template <class P> +void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { + std::vector<ImageId> images; + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { + // Skip images that were not modified from the GPU + if (False(image.flags & ImageFlagBits::GpuModified)) { + return; } - default: { - UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); - return InitializeSurface(gpu_addr, params, do_load); + // Skip images that .are. modified from the CPU + // We don't want to write sensitive data from the guest + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; } + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + return; } + image.flags &= ~ImageFlagBits::GpuModified; + images.push_back(image_id); + }); + if (images.empty()) { + return; + } + std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { + return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; + }); + for (const ImageId image_id : images) { + Image& image = slot_images[image_id]; + auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, 0, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); } +} - /** - * Takes a single surface and recreates into another that may differ in - * format, target or width alignment. - * - * @param current_surface The registered surface in the cache which we want to convert. - * @param params The new surface params which we'll use to recreate the surface. - * @param is_render Whether or not the surface is a render target. - **/ - std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, - bool is_render) { - const auto gpu_addr = current_surface->GetGpuAddr(); - const auto& cr_params = current_surface->GetSurfaceParams(); - TSurface new_surface; - if (cr_params.pixel_format != params.pixel_format && !is_render && - GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { - SurfaceParams new_params = params; - new_params.pixel_format = cr_params.pixel_format; - new_params.type = cr_params.type; - new_surface = GetUncachedSurface(gpu_addr, new_params); - } else { - new_surface = GetUncachedSurface(gpu_addr, params); - } - const SurfaceParams& final_params = new_surface->GetSurfaceParams(); - if (cr_params.type != final_params.type) { - if (Settings::IsGPULevelExtreme()) { - BufferCopy(current_surface, new_surface); - } - } else { - std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); - for (auto& brick : bricks) { - TryCopyImage(current_surface, new_surface, brick); - } - } - Unregister(current_surface); - Register(new_surface); - new_surface->MarkAsModified(current_surface->IsModified(), Tick()); - return {new_surface, new_surface->GetMainView()}; - } - - /** - * Takes a single surface and checks with the new surface's params if it's an exact - * match, we return the main view of the registered surface. If its formats don't - * match, we rebuild the surface. We call this last method a `Mirage`. If formats - * match but the targets don't, we create an overview View of the registered surface. - * - * @param current_surface The registered surface in the cache which we want to convert. - * @param params The new surface params which we want to check. - * @param is_render Whether or not the surface is a render target. - **/ - std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, - const SurfaceParams& params, bool is_render) { - const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); - const bool matches_target = current_surface->MatchTarget(params.target); - const auto match_check = [&]() -> std::pair<TSurface, TView> { - if (matches_target) { - return {current_surface, current_surface->GetMainView()}; - } - return {current_surface, current_surface->EmplaceOverview(params)}; - }; - if (!is_mirage) { - return match_check(); - } - if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { - return match_check(); - } - return RebuildSurface(current_surface, params, is_render); - } - - /** - * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate - * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps - * of the new surface, if they all match we end up recreating a surface for them, - * else we return nothing. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - **/ - std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, - const SurfaceParams& params, - GPUVAddr gpu_addr) { - if (params.target == SurfaceTarget::Texture3D) { - return std::nullopt; - } - const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; - TSurface new_surface = GetUncachedSurface(gpu_addr, params); +template <class P> +void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { + std::vector<ImageId> deleted_images; + ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image); + } + UnregisterImage(id); + DeleteImage(id); + } +} - if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { - LoadSurface(new_surface); - for (const auto& surface : overlaps) { - Unregister(surface); - } - Register(new_surface); - return {{new_surface, new_surface->GetMainView()}}; - } +template <class P> +void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + const BlitImages images = GetBlitImages(dst, src); + const ImageId dst_id = images.dst_id; + const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); + PrepareImage(dst_id, true, false); + + ImageBase& dst_image = slot_images[dst_id]; + const ImageBase& src_image = slot_images[src_id]; + + // TODO: Deduplicate + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const std::array src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; - std::size_t passed_tests = 0; - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; - if (!mipmap_layer) { - continue; - } - const auto [base_layer, base_mipmap] = *mipmap_layer; - if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { - continue; - } - ++passed_tests; - - // Copy all mipmaps and layers - const u32 block_width = params.GetDefaultBlockWidth(); - const u32 block_height = params.GetDefaultBlockHeight(); - for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { - const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); - const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); - if (width < block_width || height < block_height) { - // Current APIs forbid copying small compressed textures, avoid errors - break; - } - const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, - src_params.depth); - TryCopyImage(surface, new_surface, copy_params); - } - } - if (passed_tests == 0) { - return std::nullopt; - } - if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { - // In Accurate GPU all tests should pass, else we recycle - return std::nullopt; - } + const std::optional src_base = src_image.TryFindBase(src.Address()); + const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; + const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); + const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const std::array dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, + }; - const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); - for (const auto& surface : overlaps) { - Unregister(surface); - } + // Always call this after src_framebuffer_id was queried, as the address might be invalidated. + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + if constexpr (FRAMEBUFFER_BLITS) { + // OpenGL blits from framebuffers, not images + Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; + runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, + copy.filter, copy.operation); + } else { + // Vulkan can blit images, but it lacks format reinterpretations + // Provide a framebuffer in case it's necessary + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, + copy.operation); + } +} - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - return {{new_surface, new_surface->GetMainView()}}; - } - - /** - * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D - * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of - * the HLE methods. - * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param cpu_addr The starting address of the new surface on physical memory. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. - */ - std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, - const SurfaceParams& params, - GPUVAddr gpu_addr, VAddr cpu_addr, - bool preserve_contents) { - if (params.target != SurfaceTarget::Texture3D) { - for (const auto& surface : overlaps) { - if (!surface->MatchTarget(params.target)) { - if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { - if (Settings::IsGPULevelExtreme()) { - return std::nullopt; - } - Unregister(surface); - return InitializeSurface(gpu_addr, params, preserve_contents); - } - return std::nullopt; - } - if (surface->GetCpuAddr() != cpu_addr) { - continue; - } - if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { - return std::make_pair(surface, surface->GetMainView()); - } - } - return InitializeSurface(gpu_addr, params, preserve_contents); - } +template <class P> +void TextureCache<P>::InvalidateColorBuffer(size_t index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + color_buffer_id = FindColorBuffer(index, false); + if (!color_buffer_id) { + LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); + return; + } + // When invalidating a color buffer, the old contents are no longer relevant + ImageView& color_buffer = slot_image_views[color_buffer_id]; + Image& image = slot_images[color_buffer.image_id]; + image.flags &= ~ImageFlagBits::CpuModified; + image.flags &= ~ImageFlagBits::GpuModified; - if (params.num_levels > 1) { - // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach - return std::nullopt; - } + runtime.InvalidateColorBuffer(color_buffer, index); +} - if (overlaps.size() == 1) { - const auto& surface = overlaps[0]; - const SurfaceParams& overlap_params = surface->GetSurfaceParams(); - // Don't attempt to render to textures with more than one level for now - // The texture has to be to the right or the sample address if we want to render to it - if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { - const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); - const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); - if (slice < overlap_params.depth) { - auto view = surface->Emplace3DView(slice, params.depth, 0, 1); - return std::make_pair(std::move(surface), std::move(view)); - } - } - } +template <class P> +void TextureCache<P>::InvalidateDepthBuffer() { + ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; + depth_buffer_id = FindDepthBuffer(false); + if (!depth_buffer_id) { + LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); + return; + } + // When invalidating the depth buffer, the old contents are no longer relevant + ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; + image.flags &= ~ImageFlagBits::CpuModified; + image.flags &= ~ImageFlagBits::GpuModified; - TSurface new_surface = GetUncachedSurface(gpu_addr, params); - bool modified = false; + ImageView& depth_buffer = slot_image_views[depth_buffer_id]; + runtime.InvalidateDepthBuffer(depth_buffer); +} - for (auto& surface : overlaps) { - const SurfaceParams& src_params = surface->GetSurfaceParams(); - if (src_params.target != SurfaceTarget::Texture2D || - src_params.height != params.height || - src_params.block_depth != params.block_depth || - src_params.block_height != params.block_height) { - return std::nullopt; - } - modified |= surface->IsModified(); - - const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); - const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); - const u32 width = params.width; - const u32 height = params.height; - const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); - TryCopyImage(surface, new_surface, copy_params); +template <class P> +typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { + // TODO: Properly implement this + const auto it = page_table.find(cpu_addr >> PAGE_SHIFT); + if (it == page_table.end()) { + return nullptr; + } + const auto& image_ids = it->second; + for (const ImageId image_id : image_ids) { + const ImageBase& image = slot_images[image_id]; + if (image.cpu_addr != cpu_addr) { + continue; } - for (const auto& surface : overlaps) { - Unregister(surface); + if (image.image_view_ids.empty()) { + continue; } - new_surface->MarkAsModified(modified, Tick()); - Register(new_surface); - - TView view = new_surface->GetMainView(); - return std::make_pair(std::move(new_surface), std::move(view)); - } - - /** - * Gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache. This is done in 3 big steps: - * - * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. - * - * 2. Check if there are any overlaps at all, if there are none, we just load the texture from - * memory else we move to step 3. - * - * 3. Consists of figuring out the relationship between the candidate texture and the - * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If - * there's many, we just try to reconstruct a new surface out of them based on the - * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we - * have to check if the candidate is a view (layer/mipmap) of the overlap or if the - * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct - * a new surface. - * - * @param gpu_addr The starting address of the candidate surface. - * @param params The parameters on the candidate surface. - * @param preserve_contents Indicates that the new surface should be loaded from memory or - * left blank. - * @param is_render Whether or not the surface is a render target. - **/ - std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, - const SurfaceParams& params, bool preserve_contents, - bool is_render) { - // Step 1 - // Check Level 1 Cache for a fast structural match. If candidate surface - // matches at certain level we are pretty much done. - if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { - TSurface& current_surface = iter->second; - const auto topological_result = current_surface->MatchesTopology(params); - if (topological_result != MatchTopologyResult::FullMatch) { - VectorSurface overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - topological_result); - } + return &slot_image_views[image.image_view_ids.at(0)]; + } + return nullptr; +} - const auto struct_result = current_surface->MatchesStructure(params); - if (struct_result != MatchStructureResult::None) { - const auto& old_params = current_surface->GetSurfaceParams(); - const bool not_3d = params.target != SurfaceTarget::Texture3D && - old_params.target != SurfaceTarget::Texture3D; - if (not_3d || current_surface->MatchTarget(params.target)) { - if (struct_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params, is_render); - } else { - return RebuildSurface(current_surface, params, is_render); - } - } - } - } +template <class P> +bool TextureCache<P>::HasUncommittedFlushes() const noexcept { + return !uncommitted_downloads.empty(); +} - // Step 2 - // Obtain all possible overlaps in the memory region - const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; +template <class P> +bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { + return !committed_downloads.empty() && !committed_downloads.front().empty(); +} - // If none are found, we are done. we just load the surface and create it. - if (overlaps.empty()) { - return InitializeSurface(gpu_addr, params, preserve_contents); - } +template <class P> +void TextureCache<P>::CommitAsyncFlushes() { + // This is intentionally passing the value by copy + committed_downloads.push(uncommitted_downloads); + uncommitted_downloads.clear(); +} - // Step 3 - // Now we need to figure the relationship between the texture and its overlaps - // we do a topological test to ensure we can find some relationship. If it fails - // immediately recycle the texture - for (const auto& surface : overlaps) { - const auto topological_result = surface->MatchesTopology(params); - if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - topological_result); - } - } +template <class P> +void TextureCache<P>::PopAsyncFlushes() { + if (committed_downloads.empty()) { + return; + } + const std::span<const ImageId> download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.MapDownloadBuffer(total_size_bytes); + size_t buffer_offset = 0; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, buffer_offset, copies); + buffer_offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + + buffer_offset = 0; + const std::span<u8> download_span = download_map.Span(); + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + const std::span<u8> image_download_span = download_span.subspan(buffer_offset); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); + buffer_offset += image.unswizzled_size_bytes; + } + committed_downloads.pop(); +} - // Manage 3D textures - if (params.block_depth > 0) { - auto surface = - Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); - if (surface) { - return *surface; - } +template <class P> +bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { + bool is_modified = false; + ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; } + is_modified = true; + return true; + }); + return is_modified; +} - // Split cases between 1 overlap or many. - if (overlaps.size() == 1) { - TSurface current_surface = overlaps[0]; - // First check if the surface is within the overlap. If not, it means - // two things either the candidate surface is a supertexture of the overlap - // or they don't match in any known way. - if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - // Now we check if the candidate is a mipmap/layer of the overlap - std::optional<TView> view = - current_surface->EmplaceView(params, gpu_addr, candidate_size); - if (view) { - const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); - if (is_mirage) { - // On a mirage view, we need to recreate the surface under this new view - // and then obtain a view again. - SurfaceParams new_params = current_surface->GetSurfaceParams(); - const u32 wh = SurfaceParams::ConvertWidth( - new_params.width, new_params.pixel_format, params.pixel_format); - const u32 hh = SurfaceParams::ConvertHeight( - new_params.height, new_params.pixel_format, params.pixel_format); - new_params.width = wh; - new_params.height = hh; - new_params.pixel_format = params.pixel_format; - std::pair<TSurface, TView> pair = - RebuildSurface(current_surface, new_params, is_render); - std::optional<TView> mirage_view = - pair.first->EmplaceView(params, gpu_addr, candidate_size); - if (mirage_view) - return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - return {current_surface, *view}; - } - } else { - // If there are many overlaps, odds are they are subtextures of the candidate - // surface. We try to construct a new surface based on the candidate parameters, - // using the overlaps. If a single overlap fails, this will fail. - std::optional<std::pair<TSurface, TView>> view = - TryReconstructSurface(overlaps, params, gpu_addr); - if (view) { - return *view; - } - } - // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - - /** - * Gets the starting address and parameters of a candidate surface and tries to find a - * matching surface within the cache that's similar to it. If there are many textures - * or the texture found if entirely incompatible, it will fail. If no texture is found, the - * blit will be unsuccessful. - * - * @param gpu_addr The starting address of the candidate surface. - * @param params The parameters on the candidate surface. - **/ - Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - - if (!cpu_addr) { - Deduction result{}; - result.type = DeductionType::DeductionFailed; - return result; - } +template <class P> +void TextureCache<P>::RefreshContents(Image& image) { + if (False(image.flags & ImageFlagBits::CpuModified)) { + // Only upload modified images + return; + } + image.flags &= ~ImageFlagBits::CpuModified; + TrackImage(image); - if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { - TSurface& current_surface = iter->second; - const auto topological_result = current_surface->MatchesTopology(params); - if (topological_result != MatchTopologyResult::FullMatch) { - Deduction result{}; - result.type = DeductionType::DeductionFailed; - return result; - } - const auto struct_result = current_surface->MatchesStructure(params); - if (struct_result != MatchStructureResult::None && - current_surface->MatchTarget(params.target)) { - Deduction result{}; - result.type = DeductionType::DeductionComplete; - result.surface = current_surface; - return result; - } - } + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + return; + } + auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); + UploadImageContents(image, map, 0); + runtime.InsertUploadMemoryBarrier(); +} - const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; +template <class P> +template <typename MapBuffer> +void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { + const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); + const GPUVAddr gpu_addr = image.gpu_addr; + + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + const auto uploads = FullUploadSwizzles(image.info); + runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); + } else if (True(image.flags & ImageFlagBits::Converted)) { + std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + ConvertImage(unswizzled_data, image.info, mapped_span, copies); + image.UploadMemory(map, buffer_offset, copies); + } else if (image.info.type == ImageType::Buffer) { + const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; + image.UploadMemory(map, buffer_offset, copies); + } else { + const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + image.UploadMemory(map, buffer_offset, copies); + } +} - if (overlaps.empty()) { - Deduction result{}; - result.type = DeductionType::DeductionIncomplete; - return result; - } +template <class P> +ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { + if (!IsValidAddress(gpu_memory, config)) { + return NULL_IMAGE_VIEW_ID; + } + const auto [pair, is_new] = image_views.try_emplace(config); + ImageViewId& image_view_id = pair->second; + if (is_new) { + image_view_id = CreateImageView(config); + } + return image_view_id; +} - if (overlaps.size() > 1) { - Deduction result{}; - result.type = DeductionType::DeductionFailed; - return result; - } else { - Deduction result{}; - result.type = DeductionType::DeductionComplete; - result.surface = overlaps[0]; - return result; - } +template <class P> +ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { + const ImageInfo info(config); + const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; + const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; } + ImageBase& image = slot_images[image_id]; + const SubresourceBase base = image.TryFindBase(config.Address()).value(); + ASSERT(base.level == 0); + const ImageViewInfo view_info(config, base.layer); + const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); + ImageViewBase& image_view = slot_image_views[image_view_id]; + image_view.flags |= ImageViewFlagBits::Strong; + image.flags |= ImageFlagBits::Strong; + return image_view_id; +} - /** - * Gets a null surface based on a target texture. - * @param target The target of the null surface. - */ - TView GetNullSurface(SurfaceTarget target) { - const u32 i_target = static_cast<u32>(target); - if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { - return it->second->GetMainView(); - } - SurfaceParams params{}; - params.target = target; - params.is_tiled = false; - params.srgb_conversion = false; - params.is_layered = - target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || - target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; - params.block_width = 0; - params.block_height = 0; - params.block_depth = 0; - params.tile_width_spacing = 1; - params.width = 1; - params.height = 1; - params.depth = 1; - if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { - params.depth = 6; - } - params.pitch = 4; - params.num_levels = 1; - params.emulated_levels = 1; - params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; - params.type = VideoCore::Surface::SurfaceType::ColorTexture; - auto surface = CreateSurface(0ULL, params); - invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); - surface->UploadTexture(invalid_memory); - surface->MarkAsModified(false, Tick()); - invalid_cache.emplace(i_target, surface); - return surface->GetMainView(); - } - - /** - * Gets the a source and destination starting address and parameters, - * and tries to deduce if they are supposed to be depth textures. If so, their - * parameters are modified and fixed into so. - * - * @param src_params The parameters of the candidate surface. - * @param dst_params The parameters of the destination surface. - * @param src_gpu_addr The starting address of the candidate surface. - * @param dst_gpu_addr The starting address of the destination surface. - **/ - void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, - const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { - auto deduced_src = DeduceSurface(src_gpu_addr, src_params); - auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); - if (deduced_src.Failed() || deduced_dst.Failed()) { - return; +template <class P> +ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { + return image_id; + } + return InsertImage(info, gpu_addr, options); +} + +template <class P> +ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + return ImageId{}; + } + ImageId image_id; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + const bool strict_size = False(options & RelaxedOptions::Size) && + True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format)) { + image_id = existing_image_id; + return true; + } + } else if (IsSubresource(info, existing_image, gpu_addr, options)) { + image_id = existing_image_id; + return true; } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + return image_id; +} - const bool incomplete_src = deduced_src.Incomplete(); - const bool incomplete_dst = deduced_dst.Incomplete(); +template <class P> +ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); + const Image& image = slot_images[image_id]; + // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different + const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); + if (is_new) { + it->second = slot_image_allocs.insert(); + } + slot_image_allocs[it->second].images.push_back(image_id); + return image_id; +} - if (incomplete_src && incomplete_dst) { +template <class P> +ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { + ImageInfo new_info = info; + const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + std::vector<ImageId> overlap_ids; + std::vector<ImageId> left_aliased_ids; + std::vector<ImageId> right_aliased_ids; + ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { + if (info.type != overlap.info.type) { return; } - - const bool any_incomplete = incomplete_src || incomplete_dst; - - if (!any_incomplete) { - if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { - return; - } - } else { - if (incomplete_src && !(deduced_dst.IsDepth())) { - return; - } - - if (incomplete_dst && !(deduced_src.IsDepth())) { - return; + if (info.type == ImageType::Linear) { + if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { + // Alias linear images with the same pitch + left_aliased_ids.push_back(overlap_id); } + return; + } + const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true); + if (solution) { + gpu_addr = solution->gpu_addr; + cpu_addr = solution->cpu_addr; + new_info.resources = solution->resources; + overlap_ids.push_back(overlap_id); + return; + } + static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; + const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); + if (IsSubresource(new_info, overlap, gpu_addr, options)) { + left_aliased_ids.push_back(overlap_id); + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) { + right_aliased_ids.push_back(overlap_id); } + }); + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); + Image& new_image = slot_images[new_image_id]; - const auto inherit_format = [](SurfaceParams& to, TSurface from) { - const SurfaceParams& params = from->GetSurfaceParams(); - to.pixel_format = params.pixel_format; - to.type = params.type; - }; - // Now we got the cases where one or both is Depth and the other is not known - if (!incomplete_src) { - inherit_format(src_params, deduced_src.surface); + // TODO: Only upload what we need + RefreshContents(new_image); + + for (const ImageId overlap_id : overlap_ids) { + Image& overlap = slot_images[overlap_id]; + if (overlap.info.num_samples != new_image.info.num_samples) { + LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); } else { - inherit_format(src_params, deduced_dst.surface); + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, copies); } - if (!incomplete_dst) { - inherit_format(dst_params, deduced_dst.surface); - } else { - inherit_format(dst_params, deduced_src.surface); + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap); } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + ImageBase& new_image_base = new_image; + for (const ImageId aliased_id : right_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + } + for (const ImageId aliased_id : left_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); } + RegisterImage(new_image_id); + return new_image_id; +} - std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, - bool preserve_contents) { - auto new_surface{GetUncachedSurface(gpu_addr, params)}; - Register(new_surface); - if (preserve_contents) { - LoadSurface(new_surface); - } - return {new_surface, new_surface->GetMainView()}; +template <class P> +typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { + static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const GPUVAddr dst_addr = dst.Address(); + const GPUVAddr src_addr = src.Address(); + ImageInfo dst_info(dst); + ImageInfo src_info(src); + ImageId dst_id; + ImageId src_id; + do { + has_deleted_images = false; + dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); + src_id = FindImage(src_info, src_addr, FIND_OPTIONS); + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + } while (has_deleted_images); + return BlitImages{ + .dst_id = dst_id, + .src_id = src_id, + .dst_format = dst_info.format, + .src_format = src_info.format, + }; +} + +template <class P> +SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { + if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { + return NULL_SAMPLER_ID; + } + const auto [pair, is_new] = samplers.try_emplace(config); + if (is_new) { + pair->second = slot_samplers.insert(runtime, config); } + return pair->second; +} - void LoadSurface(const TSurface& surface) { - staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(gpu_memory, staging_cache); - surface->UploadTexture(staging_cache.GetBuffer(0)); - surface->MarkAsModified(false, Tick()); +template <class P> +ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { + const auto& regs = maxwell3d.regs; + if (index >= regs.rt_control.count) { + return ImageViewId{}; + } + const auto& rt = regs.rt[index]; + const GPUVAddr gpu_addr = rt.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + if (rt.format == Tegra::RenderTargetFormat::NONE) { + return ImageViewId{}; } + const ImageInfo info(regs, index); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - void FlushSurface(const TSurface& surface) { - if (!surface->IsModified()) { - return; - } - staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->DownloadTexture(staging_cache.GetBuffer(0)); - surface->FlushBuffer(gpu_memory, staging_cache); - surface->MarkAsModified(false, Tick()); - } - - void RegisterInnerCache(TSurface& surface) { - const VAddr cpu_addr = surface->GetCpuAddr(); - VAddr start = cpu_addr >> registry_page_bits; - const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; - l1_cache[cpu_addr] = surface; - while (start <= end) { - registry[start].push_back(surface); - start++; - } +template <class P> +ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { + const auto& regs = maxwell3d.regs; + if (!regs.zeta_enable) { + return ImageViewId{}; + } + const GPUVAddr gpu_addr = regs.zeta.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; } + const ImageInfo info(regs); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - void UnregisterInnerCache(TSurface& surface) { - const VAddr cpu_addr = surface->GetCpuAddr(); - VAddr start = cpu_addr >> registry_page_bits; - const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; - l1_cache.erase(cpu_addr); - while (start <= end) { - auto& reg{registry[start]}; - reg.erase(std::find(reg.begin(), reg.end(), surface)); - start++; - } +template <class P> +ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear) { + const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; + const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + Image& image = slot_images[image_id]; + const ImageViewType view_type = RenderTargetImageViewType(info); + SubresourceBase base; + if (image.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = image.TryFindBase(gpu_addr).value(); } + const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; + const SubresourceRange range{ + .base = base, + .extent = {.levels = 1, .layers = layers}, + }; + return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); +} - VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { - if (size == 0) { - return {}; +template <class P> +template <typename Func> +void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 32> images; + ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } } - const VAddr cpu_addr_end = cpu_addr + size; - const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; - VectorSurface surfaces; - for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { - const auto it = registry.find(start); - if (it == registry.end()) { + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { continue; } - for (auto& surface : it->second) { - if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { - continue; + if (!image.Overlaps(cpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; } - surface->MarkAsPicked(true); - surfaces.push_back(surface); + } else { + func(image_id, image); } } - for (auto& surface : surfaces) { - surface->MarkAsPicked(false); + if constexpr (BOOL_BREAK) { + return false; } - return surfaces; + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; } +} - void ReserveSurface(const SurfaceParams& params, TSurface surface) { - surface_reserve[params].push_back(std::move(surface)); +template <class P> +ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { + Image& image = slot_images[image_id]; + if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { + return image_view_id; } + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + image.InsertView(info, image_view_id); + return image_view_id; +} + +template <class P> +void TextureCache<P>::RegisterImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), + "Trying to register an already registered image"); + image.flags |= ImageFlagBits::Registered; + ForEachPage(image.cpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { page_table[page].push_back(image_id); }); +} - TSurface TryGetReservedSurface(const SurfaceParams& params) { - auto search{surface_reserve.find(params)}; - if (search == surface_reserve.end()) { - return {}; +template <class P> +void TextureCache<P>::UnregisterImage(ImageId image_id) { + Image& image = slot_images[image_id]; + ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), + "Trying to unregister an already registered image"); + image.flags &= ~ImageFlagBits::Registered; + ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT); + return; } - for (auto& surface : search->second) { - if (!surface->IsRegistered()) { - return surface; - } + std::vector<ImageId>& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT); + return; } - return {}; - } + image_ids.erase(vector_it); + }); +} - /// Try to do an image copy logging when formats are incompatible. - void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { - const SurfaceParams& src_params = src->GetSurfaceParams(); - const SurfaceParams& dst_params = dst->GetSurfaceParams(); - if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { - LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", - static_cast<int>(dst_params.pixel_format), - static_cast<int>(src_params.pixel_format)); - return; +template <class P> +void TextureCache<P>::TrackImage(ImageBase& image) { + ASSERT(False(image.flags & ImageFlagBits::Tracked)); + image.flags |= ImageFlagBits::Tracked; + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); +} + +template <class P> +void TextureCache<P>::UntrackImage(ImageBase& image) { + ASSERT(True(image.flags & ImageFlagBits::Tracked)); + image.flags &= ~ImageFlagBits::Tracked; + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); +} + +template <class P> +void TextureCache<P>::DeleteImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + const GPUVAddr gpu_addr = image.gpu_addr; + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it == image_allocs_table.end()) { + UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); + return; + } + const ImageAllocId alloc_id = alloc_it->second; + std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; + const auto alloc_image_it = std::ranges::find(alloc_images, image_id); + if (alloc_image_it == alloc_images.end()) { + UNREACHABLE_MSG("Trying to delete an image that does not exist"); + return; + } + ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + + // Mark render targets as dirty + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + const std::span<const ImageViewId> image_view_ids = image.image_view_ids; + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; } - ImageCopy(src, dst, copy); } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + + for (const AliasedImage& alias : image.aliased_images) { + ImageBase& other_image = slot_images[alias.id]; + [[maybe_unused]] const size_t num_removed_aliases = + std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { + return other_alias.id == image_id; + }); + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); + } + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + sentenced_images.Push(std::move(slot_images[image_id])); + slot_images.erase(image_id); - constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { - return siblings_table[static_cast<std::size_t>(format)]; + alloc_images.erase(alloc_image_it); + if (alloc_images.empty()) { + image_allocs_table.erase(alloc_it); } + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} - /// Returns true the shader sampler entry is compatible with the TIC texture type. - static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, - const VideoCommon::Shader::Sampler& entry) { - const auto shader_type = entry.type; - switch (tic_type) { - case Tegra::Texture::TextureType::Texture1D: - case Tegra::Texture::TextureType::Texture1DArray: - return shader_type == Tegra::Shader::TextureType::Texture1D; - case Tegra::Texture::TextureType::Texture1DBuffer: - // TODO(Rodrigo): Assume as valid for now - return true; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return shader_type == Tegra::Shader::TextureType::Texture2D; - case Tegra::Texture::TextureType::Texture2DArray: - return shader_type == Tegra::Shader::TextureType::Texture2D || - shader_type == Tegra::Shader::TextureType::TextureCube; - case Tegra::Texture::TextureType::Texture3D: - return shader_type == Tegra::Shader::TextureType::Texture3D; - case Tegra::Texture::TextureType::TextureCubeArray: - case Tegra::Texture::TextureType::TextureCubemap: - if (shader_type == Tegra::Shader::TextureType::TextureCube) { - return true; - } - return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array; +template <class P> +void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { + auto it = image_views.begin(); + while (it != image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = image_views.erase(it); + } else { + ++it; } - UNREACHABLE(); - return true; } +} - struct FramebufferTargetInfo { - TSurface target; - TView view; - }; - - void AsyncFlushSurface(TSurface& surface) { - if (!uncommitted_flushes) { - uncommitted_flushes = std::make_shared<std::list<TSurface>>(); +template <class P> +void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { + auto it = framebuffers.begin(); + while (it != framebuffers.end()) { + if (it->first.Contains(removed_views)) { + it = framebuffers.erase(it); + } else { + ++it; } - uncommitted_flushes->push_back(surface); } +} - VideoCore::RasterizerInterface& rasterizer; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::MemoryManager& gpu_memory; - - FormatLookupTable format_lookup_table; - FormatCompatibility format_compatibility; - - u64 ticks{}; - - // Guards the cache for protection conflicts. - bool guard_render_targets{}; - bool guard_samplers{}; - - // The siblings table is for formats that can inter exchange with one another - // without causing issues. This is only valid when a conflict occurs on a non - // rendering use. - std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table; - - // The internal Cache is different for the Texture Cache. It's based on buckets - // of 1MB. This fits better for the purpose of this cache as textures are normaly - // large in size. - static constexpr u64 registry_page_bits{20}; - static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map<VAddr, std::vector<TSurface>> registry; +template <class P> +void TextureCache<P>::MarkModification(ImageBase& image) noexcept { + image.flags |= ImageFlagBits::GpuModified; + image.modification_tick = ++modification_tick; +} - static constexpr u32 DEPTH_RT = 8; - static constexpr u32 NO_RT = 0xFFFFFFFF; +template <class P> +void TextureCache<P>::SynchronizeAliases(ImageId image_id) { + boost::container::small_vector<const AliasedImage*, 1> aliased_images; + ImageBase& image = slot_images[image_id]; + u64 most_recent_tick = image.modification_tick; + for (const AliasedImage& aliased : image.aliased_images) { + ImageBase& aliased_image = slot_images[aliased.id]; + if (image.modification_tick < aliased_image.modification_tick) { + most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + aliased_images.push_back(&aliased); + } + } + if (aliased_images.empty()) { + return; + } + image.modification_tick = most_recent_tick; + std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { + const ImageBase& lhs_image = slot_images[lhs->id]; + const ImageBase& rhs_image = slot_images[rhs->id]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + for (const AliasedImage* const aliased : aliased_images) { + CopyImage(image_id, aliased->id, aliased->copies); + } +} - // The L1 Cache is used for fast texture lookup before checking the overlaps - // This avoids calculating size and other stuffs. - std::unordered_map<VAddr, TSurface> l1_cache; +template <class P> +void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { + Image& image = slot_images[image_id]; + if (invalidate) { + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (False(image.flags & ImageFlagBits::Tracked)) { + TrackImage(image); + } + } else { + RefreshContents(image); + SynchronizeAliases(image_id); + } + if (is_modification) { + MarkModification(image); + } + image.frame_tick = frame_tick; +} - /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have - /// previously been used. This is to prevent surfaces from being constantly created and - /// destroyed when used with different surface parameters. - std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; - std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> - render_targets; - FramebufferTargetInfo depth_buffer; +template <class P> +void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, + bool invalidate) { + if (!image_view_id) { + return; + } + const ImageViewBase& image_view = slot_image_views[image_view_id]; + PrepareImage(image_view.image_id, is_modification, invalidate); +} - std::vector<TSurface> sampled_textures; +template <class P> +void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { + Image& dst = slot_images[dst_id]; + Image& src = slot_images[src_id]; + const auto dst_format_type = GetFormatType(dst.info.format); + const auto src_format_type = GetFormatType(src.info.format); + if (src_format_type == dst_format_type) { + if constexpr (HAS_EMULATED_COPIES) { + if (!runtime.CanImageBeCopied(dst, src)) { + return runtime.EmulateCopyImage(dst, src, copies); + } + } + return runtime.CopyImage(dst, src, copies); + } + UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); + UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + for (const ImageCopy& copy : copies) { + UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); + UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); + + const SubresourceBase dst_base{ + .level = copy.dst_subresource.base_level, + .layer = copy.dst_subresource.base_layer, + }; + const SubresourceBase src_base{ + .level = copy.src_subresource.base_level, + .layer = copy.src_subresource.base_layer, + }; + const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; + const SubresourceExtent src_extent{.levels = 1, .layers = 1}; + const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; + const SubresourceRange src_range{.base = src_base, .extent = src_extent}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); + const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + [[maybe_unused]] const Extent3D expected_size{ + .width = std::min(dst_view.size.width, src_view.size.width), + .height = std::min(dst_view.size.height, src_view.size.height), + .depth = std::min(dst_view.size.depth, src_view.size.depth), + }; + UNIMPLEMENTED_IF(copy.extent != expected_size); - /// This cache stores null surfaces in order to be used as a placeholder - /// for invalid texture calls. - std::unordered_map<u32, TSurface> invalid_cache; - std::vector<u8> invalid_memory; + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + } +} - std::list<TSurface> marked_for_unregister; +template <class P> +void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { + if (*old_id == new_id) { + return; + } + if (*old_id) { + const ImageViewBase& old_view = slot_image_views[*old_id]; + if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { + uncommitted_downloads.push_back(old_view.image_id); + } + } + *old_id = new_id; +} - std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; - std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; +template <class P> +std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( + ImageId image_id, const ImageViewInfo& view_info) { + const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); + const ImageBase& image = slot_images[image_id]; + const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; + const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; + const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; + const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + const u32 num_samples = image.info.num_samples; + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ + .color_buffer_ids = {color_view_id}, + .depth_buffer_id = depth_view_id, + .size = {extent.width >> samples_x, extent.height >> samples_y}, + }); + return {framebuffer_id, view_id}; +} - StagingCache staging_cache; - std::recursive_mutex mutex; -}; +template <class P> +bool TextureCache<P>::IsFullClear(ImageViewId id) { + if (!id) { + return true; + } + const ImageViewBase& image_view = slot_image_views[id]; + const ImageBase& image = slot_images[image_view.image_id]; + const Extent3D size = image_view.size; + const auto& regs = maxwell3d.regs; + const auto& scissor = regs.scissor_test[0]; + if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { + // Images with multiple resources can't be cleared in a single call + return false; + } + if (regs.clear_flags.scissor == 0) { + // If scissor testing is disabled, the clear is always full + return true; + } + // Make sure the clear covers all texels in the subresource + return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && + scissor.max_y >= size.height; +} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h new file mode 100644 index 000000000..2ad2d72a6 --- /dev/null +++ b/src/video_core/texture_cache/types.h @@ -0,0 +1,140 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/texture_cache/slot_vector.h" + +namespace VideoCommon { + +constexpr size_t NUM_RT = 8; +constexpr size_t MAX_MIP_LEVELS = 14; + +constexpr SlotId CORRUPT_ID{0xfffffffe}; + +using ImageId = SlotId; +using ImageViewId = SlotId; +using ImageAllocId = SlotId; +using SamplerId = SlotId; +using FramebufferId = SlotId; + +enum class ImageType : u32 { + e1D, + e2D, + e3D, + Linear, + Buffer, +}; + +enum class ImageViewType : u32 { + e1D, + e2D, + Cube, + e3D, + e1DArray, + e2DArray, + CubeArray, + Rect, + Buffer, +}; +constexpr size_t NUM_IMAGE_VIEW_TYPES = 9; + +enum class RelaxedOptions : u32 { + Size = 1 << 0, + Format = 1 << 1, + Samples = 1 << 2, +}; +DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) + +struct Offset2D { + constexpr auto operator<=>(const Offset2D&) const noexcept = default; + + s32 x; + s32 y; +}; + +struct Offset3D { + constexpr auto operator<=>(const Offset3D&) const noexcept = default; + + s32 x; + s32 y; + s32 z; +}; + +struct Extent2D { + constexpr auto operator<=>(const Extent2D&) const noexcept = default; + + u32 width; + u32 height; +}; + +struct Extent3D { + constexpr auto operator<=>(const Extent3D&) const noexcept = default; + + u32 width; + u32 height; + u32 depth; +}; + +struct SubresourceLayers { + s32 base_level = 0; + s32 base_layer = 0; + s32 num_layers = 1; +}; + +struct SubresourceBase { + constexpr auto operator<=>(const SubresourceBase&) const noexcept = default; + + s32 level = 0; + s32 layer = 0; +}; + +struct SubresourceExtent { + constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default; + + s32 levels = 1; + s32 layers = 1; +}; + +struct SubresourceRange { + constexpr auto operator<=>(const SubresourceRange&) const noexcept = default; + + SubresourceBase base; + SubresourceExtent extent; +}; + +struct ImageCopy { + SubresourceLayers src_subresource; + SubresourceLayers dst_subresource; + Offset3D src_offset; + Offset3D dst_offset; + Extent3D extent; +}; + +struct BufferImageCopy { + size_t buffer_offset; + size_t buffer_size; + u32 buffer_row_length; + u32 buffer_image_height; + SubresourceLayers image_subresource; + Offset3D image_offset; + Extent3D image_extent; +}; + +struct BufferCopy { + size_t src_offset; + size_t dst_offset; + size_t size; +}; + +struct SwizzleParameters { + Extent3D num_tiles; + Extent3D block; + size_t buffer_offset; + s32 level; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp new file mode 100644 index 000000000..9ed1fc007 --- /dev/null +++ b/src/video_core/texture_cache/util.cpp @@ -0,0 +1,1232 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This files contains code from Ryujinx +// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx +// The sections using code from Ryujinx are marked with a link to the original version + +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include <algorithm> +#include <array> +#include <numeric> +#include <optional> +#include <span> +#include <vector> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "video_core/compatible_formats.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/decode_bc4.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/astc.h" +#include "video_core/textures/decoders.h" + +namespace VideoCommon { + +namespace { + +using Tegra::Texture::GOB_SIZE; +using Tegra::Texture::GOB_SIZE_SHIFT; +using Tegra::Texture::GOB_SIZE_X; +using Tegra::Texture::GOB_SIZE_X_SHIFT; +using Tegra::Texture::GOB_SIZE_Y; +using Tegra::Texture::GOB_SIZE_Y_SHIFT; +using Tegra::Texture::GOB_SIZE_Z; +using Tegra::Texture::GOB_SIZE_Z_SHIFT; +using Tegra::Texture::MsaaMode; +using Tegra::Texture::SwizzleTexture; +using Tegra::Texture::TextureFormat; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::UnswizzleTexture; +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::DefaultBlockHeight; +using VideoCore::Surface::DefaultBlockWidth; +using VideoCore::Surface::IsCopyCompatible; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsViewCompatible; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; + +constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); + +struct LevelInfo { + Extent3D size; + Extent3D block; + Extent2D tile_size; + u32 bpp_log2; + u32 tile_width_spacing; +}; + +[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) { + if (shift == 0) { + return 0; + } + u32 x = unit_factor << (shift - 1); + if (x >= dimension) { + while (--shift) { + x >>= 1; + if (x < dimension) { + break; + } + } + } + return shift; +} + +[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) { + return std::max<u32>(size >> level, 1); +} + +[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) { + return Extent3D{ + .width = AdjustMipSize(size.width, level), + .height = AdjustMipSize(size.height, level), + .depth = AdjustMipSize(size.depth, level), + }; +} + +[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) { + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + return Extent3D{ + .width = size.width >> samples_x, + .height = size.height >> samples_y, + .depth = size.depth, + }; +} + +template <u32 GOB_EXTENT> +[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) { + do { + while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) { + --block_size; + } + } while (level--); + return block_size; +} + +[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, + u32 level) { + return { + .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level), + .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level), + .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), + }; +} + +[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) { + return { + .width = Common::DivCeil(size.width, tile_size.width), + .height = Common::DivCeil(size.height, tile_size.height), + .depth = size.depth, + }; +} + +[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) { + return std::countl_zero(bytes_per_block) ^ 0x1F; +} + +[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) { + return BytesPerBlockLog2(BytesPerBlock(format)); +} + +[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) { + const Extent3D num_blocks = AdjustTileSize(size, tile_size); + return num_blocks.width * num_blocks.height * num_blocks.depth; +} + +[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) { + return Common::DivCeil(AdjustMipSize(size, level), block_size); +} + +[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) { + return config.Width() * config.Height() * BytesPerBlock(format); +} + +[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) { + switch (type) { + case TextureType::Texture2D: + case TextureType::Texture2DArray: + case TextureType::Texture2DNoMipmap: + case TextureType::Texture3D: + case TextureType::TextureCubeArray: + case TextureType::TextureCubemap: + return true; + case TextureType::Texture1D: + case TextureType::Texture1DArray: + case TextureType::Texture1DBuffer: + return false; + } + return false; +} + +[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) { + switch (type) { + case ImageType::e2D: + case ImageType::e3D: + case ImageType::Linear: + return true; + case ImageType::e1D: + case ImageType::Buffer: + return false; + } + UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type)); +} + +[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) { + switch (num_samples) { + case 1: + return {1, 1}; + case 2: + return {2, 1}; + case 4: + return {2, 2}; + case 8: + return {4, 2}; + case 16: + return {4, 4}; + } + UNREACHABLE_MSG("Invalid number of samples={}", num_samples); + return {1, 1}; +} + +[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) { + return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; +} + +[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) { + return Extent3D{ + .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2, + .height = AdjustSize(info.size.height, level, info.tile_size.height), + .depth = AdjustMipSize(info.size.depth, level), + }; +} + +[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { + const Extent3D blocks = NumLevelBlocks(info, level); + return Extent3D{ + .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), + .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height), + .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth), + }; +} + +[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) { + return Extent2D{ + .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing, + .height = GOB_SIZE_Y_SHIFT + block_height, + }; +} + +[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob, + u32 block_depth) { + return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) || + num_tiles.depth < (1U << block_depth); +} + +[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob, + u32 bpp_log2) { + if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) { + return GOB_SIZE_X_SHIFT - bpp_log2; + } else { + return gob.width; + } +} + +[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2, + u32 tile_width_spacing) { + const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing); + return StrideAlignment(num_tiles, block, gob, bpp_log2); +} + +[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) { + const Extent3D blocks = NumLevelBlocks(info, level); + const Extent2D gobs{ + .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT), + .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT), + }; + const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing); + const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth); + const u32 alignment = is_small ? 0 : info.tile_width_spacing; + return Extent2D{ + .width = Common::AlignBits(gobs.width, alignment), + .height = gobs.height, + }; +} + +[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) { + const Extent3D blocks = NumLevelBlocks(info, level); + const Extent3D tile_shift = TileShift(info, level); + const Extent2D gobs = NumGobs(info, level); + return Extent3D{ + .width = Common::DivCeilLog2(gobs.width, tile_shift.width), + .height = Common::DivCeilLog2(gobs.height, tile_shift.height), + .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth), + }; +} + +[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) { + const Extent3D tile_shift = TileShift(info, level); + const Extent3D tiles = LevelTiles(info, level); + const u32 num_tiles = tiles.width * tiles.height * tiles.depth; + const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth; + return num_tiles << shift; +} + +[[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info, + u32 num_levels) { + ASSERT(num_levels <= MAX_MIP_LEVELS); + std::array<u32, MAX_MIP_LEVELS> sizes{}; + for (u32 level = 0; level < num_levels; ++level) { + sizes[level] = CalculateLevelSize(info, level); + } + return sizes; +} + +[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block, + u32 num_samples, u32 tile_width_spacing) { + const auto [samples_x, samples_y] = Samples(num_samples); + const u32 bytes_per_block = BytesPerBlock(format); + return { + .size = + { + .width = size.width * samples_x, + .height = size.height * samples_y, + .depth = size.depth, + }, + .block = block, + .tile_size = DefaultBlockSize(format), + .bpp_log2 = BytesPerBlockLog2(bytes_per_block), + .tile_width_spacing = tile_width_spacing, + }; +} + +[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) { + return MakeLevelInfo(info.format, info.size, info.block, info.num_samples, + info.tile_width_spacing); +} + +[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block, + u32 num_samples, u32 tile_width_spacing, + u32 level) { + const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing); + u32 offset = 0; + for (u32 current_level = 0; current_level < level; ++current_level) { + offset += CalculateLevelSize(info, current_level); + } + return offset; +} + +[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block, + u32 tile_size_y, u32 tile_width_spacing) { + // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134 + if (tile_width_spacing > 0) { + const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth; + return Common::AlignBits(size_bytes, alignment_log2); + } + const u32 aligned_height = Common::AlignUp(size.height, tile_size_y); + while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) { + --block.height; + } + while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) { + --block.depth; + } + const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth; + const u32 num_blocks = size_bytes >> block_shift; + if (size_bytes != num_blocks << block_shift) { + return (num_blocks + 1) << block_shift; + } + return size_bytes; +} + +[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info, + const ImageBase& overlap, + bool strict_size) { + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) { + return std::nullopt; + } + if (new_info.block != info.block) { + return std::nullopt; + } + const SubresourceExtent resources = new_info.resources; + return SubresourceExtent{ + .levels = std::max(resources.levels, info.resources.levels), + .layers = std::max(resources.layers, info.resources.layers), + }; +} + +[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( + const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { + const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); + const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); + const auto it = std::ranges::find(slice_offsets, diff); + if (it == slice_offsets.end()) { + return std::nullopt; + } + const std::vector subresources = CalculateSliceSubresources(new_info); + const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { + return std::nullopt; + } + const u32 mip_depth = std::max(1U, new_info.size.depth << base.level); + if (mip_depth < info.size.depth + base.layer) { + return std::nullopt; + } + if (MipBlockSize(new_info, base.level) != info.block) { + return std::nullopt; + } + return SubresourceExtent{ + .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), + .layers = 1, + }; +} + +[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D( + const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { + const u32 layer_stride = new_info.layer_stride; + const s32 new_size = layer_stride * new_info.resources.layers; + const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr); + if (diff > new_size) { + return std::nullopt; + } + const s32 base_layer = diff / layer_stride; + const s32 mip_offset = diff % layer_stride; + const std::array offsets = CalculateMipLevelOffsets(new_info); + const auto end = offsets.begin() + new_info.resources.levels; + const auto it = std::find(offsets.begin(), end, mip_offset); + if (it == end) { + // Mipmap is not aligned to any valid size + return std::nullopt; + } + const SubresourceBase base{ + .level = static_cast<s32>(std::distance(offsets.begin(), it)), + .layer = base_layer, + }; + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { + return std::nullopt; + } + if (MipBlockSize(new_info, base.level) != info.block) { + return std::nullopt; + } + return SubresourceExtent{ + .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), + .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer), + }; +} + +[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info, + GPUVAddr gpu_addr, + VAddr cpu_addr, + const ImageBase& overlap, + bool strict_size) { + std::optional<SubresourceExtent> resources; + if (new_info.type != ImageType::e3D) { + resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size); + } else { + resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size); + } + if (!resources) { + return std::nullopt; + } + return OverlapResult{ + .gpu_addr = gpu_addr, + .cpu_addr = cpu_addr, + .resources = *resources, + }; +} + +[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info, + GPUVAddr gpu_addr, + VAddr cpu_addr, + const ImageBase& overlap, + bool strict_size) { + const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr); + if (!base) { + return std::nullopt; + } + const ImageInfo& info = overlap.info; + if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) { + return std::nullopt; + } + if (new_info.block != MipBlockSize(info, base->level)) { + return std::nullopt; + } + const SubresourceExtent resources = new_info.resources; + s32 layers = 1; + if (info.type != ImageType::e3D) { + layers = std::max(resources.layers, info.resources.layers + base->layer); + } + return OverlapResult{ + .gpu_addr = overlap.gpu_addr, + .cpu_addr = overlap.cpu_addr, + .resources = + { + .levels = std::max(resources.levels + base->level, info.resources.levels), + .layers = layers, + }, + }; +} + +[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) { + // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212 + static constexpr u32 STRIDE_ALIGNMENT = 32; + ASSERT(info.type == ImageType::Linear); + const Extent2D num_tiles{ + .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)), + .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)), + }; + const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format); + return Extent2D{ + .width = Common::AlignUp(num_tiles.width, width_alignment), + .height = num_tiles.height, + }; +} + +[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) { + // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176 + ASSERT(info.type != ImageType::Linear); + const Extent3D size = AdjustMipSize(info.size, level); + const Extent3D num_tiles{ + .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)), + .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)), + .depth = size.depth, + }; + const u32 bpp_log2 = BytesPerBlockLog2(info.format); + const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); + const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); + return Extent3D{ + .width = Common::AlignBits(num_tiles.width, alignment), + .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), + .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth), + }; +} + +[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept { + u32 num_blocks = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + const Extent3D mip_size = AdjustMipSize(info.size, level); + num_blocks += NumBlocks(mip_size, tile_size); + } + return num_blocks; +} + +[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept { + ASSERT(info.type == ImageType::e3D); + u32 num_slices = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + num_slices += AdjustMipSize(info.size.depth, level); + } + return num_slices; +} + +void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageInfo& info, const BufferImageCopy& copy, + std::span<const u8> memory) { + ASSERT(copy.image_offset.z == 0); + ASSERT(copy.image_extent.depth == 1); + ASSERT(copy.image_subresource.base_level == 0); + ASSERT(copy.image_subresource.base_layer == 0); + ASSERT(copy.image_subresource.num_layers == 1); + + const u32 bytes_per_block = BytesPerBlock(info.format); + const u32 row_length = copy.image_extent.width * bytes_per_block; + const u32 guest_offset_x = copy.image_offset.x * bytes_per_block; + + for (u32 line = 0; line < copy.image_extent.height; ++line) { + const u32 host_offset_y = line * info.pitch; + const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch; + const u32 guest_offset = guest_offset_x + guest_offset_y; + gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y, + row_length); + } +} + +void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageInfo& info, const BufferImageCopy& copy, + std::span<const u8> input) { + const Extent3D size = info.size; + const LevelInfo level_info = MakeLevelInfo(info); + const Extent2D tile_size = DefaultBlockSize(info.format); + const u32 bytes_per_block = BytesPerBlock(info.format); + + const s32 level = copy.image_subresource.base_level; + const Extent3D level_size = AdjustMipSize(size, level); + const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); + const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; + + UNIMPLEMENTED_IF(info.tile_width_spacing > 0); + + UNIMPLEMENTED_IF(copy.image_offset.x != 0); + UNIMPLEMENTED_IF(copy.image_offset.y != 0); + UNIMPLEMENTED_IF(copy.image_offset.z != 0); + UNIMPLEMENTED_IF(copy.image_extent != level_size); + + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + + size_t host_offset = copy.buffer_offset; + + const u32 num_levels = info.resources.levels; + const std::array sizes = CalculateLevelSizes(level_info, num_levels); + size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0); + const size_t layer_stride = + AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size, + level_info.block, tile_size.height, info.tile_width_spacing); + const size_t subresource_size = sizes[level]; + + const auto dst_data = std::make_unique<u8[]>(subresource_size); + const std::span<u8> dst(dst_data.get(), subresource_size); + + for (s32 layer = 0; layer < info.resources.layers; ++layer) { + const std::span<const u8> src = input.subspan(host_offset); + SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, + num_tiles.depth, block.height, block.depth); + + gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); + + host_offset += host_bytes_per_layer; + guest_offset += layer_stride; + } + ASSERT(host_offset - copy.buffer_offset == copy.buffer_size); +} + +} // Anonymous namespace + +u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept { + if (info.type == ImageType::Buffer) { + return info.size.width * BytesPerBlock(info.format); + } + if (info.type == ImageType::Linear) { + return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); + } + if (info.resources.layers > 1) { + ASSERT(info.layer_stride != 0); + return info.layer_stride * info.resources.layers; + } else { + return CalculateLayerSize(info); + } +} + +u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { + if (info.type == ImageType::Buffer) { + return info.size.width * BytesPerBlock(info.format); + } + if (info.num_samples > 1) { + // Multisample images can't be uploaded or downloaded to the host + return 0; + } + if (info.type == ImageType::Linear) { + return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); + } + const Extent2D tile_size = DefaultBlockSize(info.format); + return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format); +} + +u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { + if (info.type == ImageType::Buffer) { + return info.size.width * BytesPerBlock(info.format); + } + static constexpr Extent2D TILE_SIZE{1, 1}; + return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; +} + +u32 CalculateLayerStride(const ImageInfo& info) noexcept { + ASSERT(info.type != ImageType::Linear); + const u32 layer_size = CalculateLayerSize(info); + const Extent3D size = info.size; + const Extent3D block = info.block; + const u32 tile_size_y = DefaultBlockHeight(info.format); + return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing); +} + +u32 CalculateLayerSize(const ImageInfo& info) noexcept { + ASSERT(info.type != ImageType::Linear); + return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples, + info.tile_width_spacing, info.resources.levels); +} + +std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept { + ASSERT(info.resources.levels <= MAX_MIP_LEVELS); + const LevelInfo level_info = MakeLevelInfo(info); + std::array<u32, MAX_MIP_LEVELS> offsets{}; + u32 offset = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + offsets[level] = offset; + offset += CalculateLevelSize(level_info, level); + } + return offsets; +} + +std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { + ASSERT(info.type == ImageType::e3D); + std::vector<u32> offsets; + offsets.reserve(NumSlices(info)); + + const LevelInfo level_info = MakeLevelInfo(info); + u32 mip_offset = 0; + for (s32 level = 0; level < info.resources.levels; ++level) { + const Extent3D tile_shift = TileShift(level_info, level); + const Extent3D tiles = LevelTiles(level_info, level); + const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT; + const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift; + const u32 z_mask = (1U << tile_shift.depth) - 1; + const u32 depth = AdjustMipSize(info.size.depth, level); + for (u32 slice = 0; slice < depth; ++slice) { + const u32 z_low = slice & z_mask; + const u32 z_high = slice & ~z_mask; + offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size)); + } + mip_offset += CalculateLevelSize(level_info, level); + } + return offsets; +} + +std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { + ASSERT(info.type == ImageType::e3D); + std::vector<SubresourceBase> subresources; + subresources.reserve(NumSlices(info)); + for (s32 level = 0; level < info.resources.levels; ++level) { + const s32 depth = AdjustMipSize(info.size.depth, level); + for (s32 slice = 0; slice < depth; ++slice) { + subresources.emplace_back(SubresourceBase{ + .level = level, + .layer = slice, + }); + } + } + return subresources; +} + +u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) { + const Extent2D tile_size = DefaultBlockSize(info.format); + const Extent3D level_size = AdjustMipSize(info.size, level); + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level); + const u32 bpp_log2 = BytesPerBlockLog2(info.format); + return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing); +} + +PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept { + return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, + config.a_type, config.srgb_conversion); +} + +ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { + switch (info.type) { + case ImageType::e2D: + return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D; + case ImageType::e3D: + return ImageViewType::e2DArray; + case ImageType::Linear: + return ImageViewType::e2D; + default: + UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type)); + return ImageViewType{}; + } +} + +std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, + SubresourceBase base) { + ASSERT(dst.resources.levels >= src.resources.levels); + ASSERT(dst.num_samples == src.num_samples); + + const bool is_dst_3d = dst.type == ImageType::e3D; + if (is_dst_3d) { + ASSERT(src.type == ImageType::e3D); + ASSERT(src.resources.levels == 1); + } + + std::vector<ImageCopy> copies; + copies.reserve(src.resources.levels); + for (s32 level = 0; level < src.resources.levels; ++level) { + ImageCopy& copy = copies.emplace_back(); + copy.src_subresource = SubresourceLayers{ + .base_level = level, + .base_layer = 0, + .num_layers = src.resources.layers, + }; + copy.dst_subresource = SubresourceLayers{ + .base_level = base.level + level, + .base_layer = is_dst_3d ? 0 : base.layer, + .num_layers = is_dst_3d ? 1 : src.resources.layers, + }; + copy.src_offset = Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }; + copy.dst_offset = Offset3D{ + .x = 0, + .y = 0, + .z = is_dst_3d ? base.layer : 0, + }; + const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level); + copy.extent = AdjustSamplesSize(mip_size, dst.num_samples); + if (is_dst_3d) { + copy.extent.depth = src.size.depth; + } + } + return copies; +} + +bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { + if (config.Address() == 0) { + return false; + } + if (config.Address() > (u64(1) << 48)) { + return false; + } + return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); +} + +std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageInfo& info, std::span<u8> output) { + const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); + const u32 bpp_log2 = BytesPerBlockLog2(info.format); + const Extent3D size = info.size; + + if (info.type == ImageType::Linear) { + gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); + + ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); + return {{ + .buffer_offset = 0, + .buffer_size = guest_size_bytes, + .buffer_row_length = info.pitch >> bpp_log2, + .buffer_image_height = size.height, + .image_subresource = + { + .base_level = 0, + .base_layer = 0, + .num_layers = 1, + }, + .image_offset = {0, 0, 0}, + .image_extent = size, + }}; + } + const auto input_data = std::make_unique<u8[]>(guest_size_bytes); + gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); + const std::span<const u8> input(input_data.get(), guest_size_bytes); + + const LevelInfo level_info = MakeLevelInfo(info); + const s32 num_layers = info.resources.layers; + const s32 num_levels = info.resources.levels; + const Extent2D tile_size = DefaultBlockSize(info.format); + const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); + const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); + const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0); + const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height, + info.tile_width_spacing); + size_t guest_offset = 0; + u32 host_offset = 0; + std::vector<BufferImageCopy> copies(num_levels); + + for (s32 level = 0; level < num_levels; ++level) { + const Extent3D level_size = AdjustMipSize(size, level); + const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); + const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2; + copies[level] = BufferImageCopy{ + .buffer_offset = host_offset, + .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers, + .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width), + .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height), + .image_subresource = + { + .base_level = level, + .base_layer = 0, + .num_layers = info.resources.layers, + }, + .image_offset = {0, 0, 0}, + .image_extent = level_size, + }; + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2); + size_t guest_layer_offset = 0; + + for (s32 layer = 0; layer < info.resources.layers; ++layer) { + const std::span<u8> dst = output.subspan(host_offset); + const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset); + UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height, + num_tiles.depth, block.height, block.depth, stride_alignment); + guest_layer_offset += layer_stride; + host_offset += host_bytes_per_layer; + } + guest_offset += level_sizes[level]; + } + return copies; +} + +BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageBase& image, std::span<u8> output) { + gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); + return BufferCopy{ + .src_offset = 0, + .dst_offset = 0, + .size = image.guest_size_bytes, + }; +} + +void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, + std::span<BufferImageCopy> copies) { + u32 output_offset = 0; + + const Extent2D tile_size = DefaultBlockSize(info.format); + for (BufferImageCopy& copy : copies) { + const u32 level = copy.image_subresource.base_level; + const Extent3D mip_size = AdjustMipSize(info.size, level); + ASSERT(copy.image_offset == Offset3D{}); + ASSERT(copy.image_subresource.base_layer == 0); + ASSERT(copy.image_extent == mip_size); + ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); + ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); + + if (IsPixelFormatASTC(info.format)) { + ASSERT(copy.image_extent.depth == 1); + Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), + copy.image_extent.width, copy.image_extent.height, + copy.image_subresource.num_layers, tile_size.width, + tile_size.height, output.subspan(output_offset)); + } else { + DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, + output.subspan(output_offset)); + } + copy.buffer_offset = output_offset; + copy.buffer_row_length = mip_size.width; + copy.buffer_image_height = mip_size.height; + + output_offset += copy.image_extent.width * copy.image_extent.height * + copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; + } +} + +std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { + const Extent3D size = info.size; + const u32 bytes_per_block = BytesPerBlock(info.format); + if (info.type == ImageType::Linear) { + ASSERT(info.pitch % bytes_per_block == 0); + return {{ + .buffer_offset = 0, + .buffer_size = static_cast<size_t>(info.pitch) * size.height, + .buffer_row_length = info.pitch / bytes_per_block, + .buffer_image_height = size.height, + .image_subresource = + { + .base_level = 0, + .base_layer = 0, + .num_layers = 1, + }, + .image_offset = {0, 0, 0}, + .image_extent = size, + }}; + } + UNIMPLEMENTED_IF(info.tile_width_spacing > 0); + + const s32 num_layers = info.resources.layers; + const s32 num_levels = info.resources.levels; + const Extent2D tile_size = DefaultBlockSize(info.format); + + u32 host_offset = 0; + + std::vector<BufferImageCopy> copies(num_levels); + for (s32 level = 0; level < num_levels; ++level) { + const Extent3D level_size = AdjustMipSize(size, level); + const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); + const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers; + copies[level] = BufferImageCopy{ + .buffer_offset = host_offset, + .buffer_size = host_bytes_per_level, + .buffer_row_length = level_size.width, + .buffer_image_height = level_size.height, + .image_subresource = + { + .base_level = level, + .base_layer = 0, + .num_layers = info.resources.layers, + }, + .image_offset = {0, 0, 0}, + .image_extent = level_size, + }; + host_offset += host_bytes_per_level; + } + return copies; +} + +Extent3D MipSize(Extent3D size, u32 level) { + return AdjustMipSize(size, level); +} + +Extent3D MipBlockSize(const ImageInfo& info, u32 level) { + const LevelInfo level_info = MakeLevelInfo(info); + const Extent2D tile_size = DefaultBlockSize(info.format); + const Extent3D level_size = AdjustMipSize(info.size, level); + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + return AdjustMipBlockSize(num_tiles, level_info.block, level); +} + +std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { + const Extent2D tile_size = DefaultBlockSize(info.format); + if (info.type == ImageType::Linear) { + return std::vector{SwizzleParameters{ + .num_tiles = AdjustTileSize(info.size, tile_size), + .block = {}, + .buffer_offset = 0, + .level = 0, + }}; + } + const LevelInfo level_info = MakeLevelInfo(info); + const Extent3D size = info.size; + const s32 num_levels = info.resources.levels; + + u32 guest_offset = 0; + std::vector<SwizzleParameters> params(num_levels); + for (s32 level = 0; level < num_levels; ++level) { + const Extent3D level_size = AdjustMipSize(size, level); + const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); + const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); + params[level] = SwizzleParameters{ + .num_tiles = num_tiles, + .block = block, + .buffer_offset = guest_offset, + .level = level, + }; + guest_offset += CalculateLevelSize(level_info, level); + } + return params; +} + +void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span<const BufferImageCopy> copies, std::span<const u8> memory) { + const bool is_pitch_linear = info.type == ImageType::Linear; + for (const BufferImageCopy& copy : copies) { + if (is_pitch_linear) { + SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); + } else { + SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); + } + } +} + +bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level, + u32 rhs_level, bool strict_size) noexcept { + ASSERT(lhs.type != ImageType::Linear); + ASSERT(rhs.type != ImageType::Linear); + if (strict_size) { + const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level); + const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level); + return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; + } else { + const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level); + const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level); + return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; + } +} + +bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { + ASSERT(lhs.type == ImageType::Linear); + ASSERT(rhs.type == ImageType::Linear); + if (strict_size) { + return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height; + } else { + const Extent2D lhs_size = PitchLinearAlignedSize(lhs); + const Extent2D rhs_size = PitchLinearAlignedSize(rhs); + return lhs_size == rhs_size; + } +} + +std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, + VAddr cpu_addr, const ImageBase& overlap, + bool strict_size) { + ASSERT(new_info.type != ImageType::Linear); + ASSERT(overlap.info.type != ImageType::Linear); + if (!IsLayerStrideCompatible(new_info, overlap.info)) { + return std::nullopt; + } + if (!IsViewCompatible(overlap.info.format, new_info.format)) { + return std::nullopt; + } + if (gpu_addr == overlap.gpu_addr) { + const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size); + if (!solution) { + return std::nullopt; + } + return OverlapResult{ + .gpu_addr = gpu_addr, + .cpu_addr = cpu_addr, + .resources = *solution, + }; + } + if (overlap.gpu_addr > gpu_addr) { + return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); + } + // if overlap.gpu_addr < gpu_addr + return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); +} + +bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { + // If either of the layer strides is zero, we can assume they are compatible + // These images generally come from rendertargets + if (lhs.layer_stride == 0) { + return true; + } + if (rhs.layer_stride == 0) { + return true; + } + // It's definitely compatible if the layer stride matches + if (lhs.layer_stride == rhs.layer_stride) { + return true; + } + // Although we also have to compare for cases where it can be unaligned + // This can happen if the image doesn't have layers, so the stride is not aligned + if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) { + return true; + } + return false; +} + +std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, + GPUVAddr candidate_addr, RelaxedOptions options) { + const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); + if (!base) { + return std::nullopt; + } + const ImageInfo& existing = image.info; + if (False(options & RelaxedOptions::Format)) { + if (!IsViewCompatible(existing.format, candidate.format)) { + return std::nullopt; + } + } + if (!IsLayerStrideCompatible(existing, candidate)) { + return std::nullopt; + } + if (existing.type != candidate.type) { + return std::nullopt; + } + if (False(options & RelaxedOptions::Samples)) { + if (existing.num_samples != candidate.num_samples) { + return std::nullopt; + } + } + if (existing.resources.levels < candidate.resources.levels + base->level) { + return std::nullopt; + } + if (existing.type == ImageType::e3D) { + const u32 mip_depth = std::max(1U, existing.size.depth << base->level); + if (mip_depth < candidate.size.depth + base->layer) { + return std::nullopt; + } + } else { + if (existing.resources.layers < candidate.resources.layers + base->layer) { + return std::nullopt; + } + } + const bool strict_size = False(options & RelaxedOptions::Size); + if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { + return std::nullopt; + } + // TODO: compare block sizes + return base; +} + +bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, + RelaxedOptions options) { + return FindSubresource(candidate, image, candidate_addr, options).has_value(); +} + +void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, + const ImageBase* src) { + if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; + } + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + dst_info.format = dst->info.format; + } + if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + dst_info.format = src->info.format; + } + if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; + } +} + +u32 MapSizeBytes(const ImageBase& image) { + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + return image.guest_size_bytes; + } else if (True(image.flags & ImageFlagBits::Converted)) { + return image.converted_size_bytes; + } else { + return image.unswizzled_size_bytes; + } +} + +using P = PixelFormat; + +static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000); +static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000); + +static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00); +static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) == + 0x50d200); + +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600); +static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800); + +constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height, + u32 tile_width_spacing, u32 level) { + const Extent3D size{width, height, 1}; + const Extent3D block{0, block_height, 0}; + const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level); + return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing); +} + +static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800); +static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000); +static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000); + +static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000, + "Tile width spacing is not working"); +static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000, + "Compressed tile width spacing is not working"); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h new file mode 100644 index 000000000..dbbbd33cd --- /dev/null +++ b/src/video_core/texture_cache/util.h @@ -0,0 +1,107 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <optional> +#include <span> + +#include "common/common_types.h" + +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::TICEntry; + +struct OverlapResult { + GPUVAddr gpu_addr; + VAddr cpu_addr; + SubresourceExtent resources; +}; + +[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept; + +[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept; + +[[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets( + const ImageInfo& info) noexcept; + +[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); + +[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); + +[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); + +[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC( + const Tegra::Texture::TICEntry& config) noexcept; + +[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; + +[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, + const ImageInfo& src, + SubresourceBase base); + +[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); + +[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, + GPUVAddr gpu_addr, const ImageInfo& info, + std::span<u8> output); + +[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageBase& image, std::span<u8> output); + +void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, + std::span<BufferImageCopy> copies); + +[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); + +[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); + +[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); + +[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); + +void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span<const BufferImageCopy> copies, std::span<const u8> memory); + +[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, + const ImageInfo& overlap_info, u32 new_level, + u32 overlap_level, bool strict_size) noexcept; + +[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, + bool strict_size) noexcept; + +[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, + GPUVAddr gpu_addr, VAddr cpu_addr, + const ImageBase& overlap, + bool strict_size); + +[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); + +[[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, + const ImageBase& image, + GPUVAddr candidate_addr, + RelaxedOptions options); + +[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, + GPUVAddr candidate_addr, RelaxedOptions options); + +void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, + const ImageBase* src); + +[[nodiscard]] u32 MapSizeBytes(const ImageBase& image); + +} // namespace VideoCommon diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 365bde2f1..acd5bdd78 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -18,6 +18,7 @@ #include <algorithm> #include <cassert> #include <cstring> +#include <span> #include <vector> #include <boost/container/static_vector.hpp> @@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { return params; } -static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, +static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { // Don't actually care about the void extent, just read the bits... for (s32 i = 0; i < 4; ++i) { @@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block } } -static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { +static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { for (u32 j = 0; j < blockHeight; j++) { for (u32 i = 0; i < blockWidth; i++) { outBuf[j * blockWidth + i] = 0xFFFF00FF; @@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, #undef READ_INT_VALUES } -static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, - u32* outBuf) { - InputBitStream strm(inBuf); +static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, + const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { + InputBitStream strm(inBuf.data()); TexelWeightParams weightParams = DecodeBlockInfo(strm); // Was there an error? @@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 } // Read the texel weight data.. - u8 texelWeightData[16]; - memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); + std::array<u8, 16> texelWeightData; + std::ranges::copy(inBuf, texelWeightData.begin()); // Reverse everything for (u32 i = 0; i < 8; i++) { @@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 // Make sure that higher non-texel bits are set to zero const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; - texelWeightData[clearByteStart - 1] = - texelWeightData[clearByteStart - 1] & - static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); - memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); + if (clearByteStart > 0) { + texelWeightData[clearByteStart - 1] &= + static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); + } + std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U)); IntegerEncodedVector texelWeightValues; - InputBitStream weightStream(texelWeightData); + InputBitStream weightStream(texelWeightData.data()); DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, weightParams.GetNumWeightValues()); @@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 namespace Tegra::Texture::ASTC { -std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, - u32 block_height) { - u32 blockIdx = 0; +void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { + u32 block_index = 0; std::size_t depth_offset = 0; - std::vector<u8> outData(height * width * depth * 4); - for (u32 k = 0; k < depth; k++) { - for (u32 j = 0; j < height; j += block_height) { - for (u32 i = 0; i < width; i += block_width) { - - const u8* blockPtr = data + blockIdx * 16; + for (u32 z = 0; z < depth; z++) { + for (u32 y = 0; y < height; y += block_height) { + for (u32 x = 0; x < width; x += block_width) { + const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; // Blocks can be at most 12x12 - u32 uncompData[144]; + std::array<u32, 12 * 12> uncompData; ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); - u32 decompWidth = std::min(block_width, width - i); - u32 decompHeight = std::min(block_height, height - j); + u32 decompWidth = std::min(block_width, width - x); + u32 decompHeight = std::min(block_height, height - y); - u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; + const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); for (u32 jj = 0; jj < decompHeight; jj++) { - memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); + std::memcpy(outRow.data() + jj * width * 4, + uncompData.data() + jj * block_width, decompWidth * 4); } - - blockIdx++; + ++block_index; } } depth_offset += height * width * 4; } - - return outData; } } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 991cdba72..9105119bc 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -5,11 +5,10 @@ #pragma once #include <cstdint> -#include <vector> namespace Tegra::Texture::ASTC { -std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, - uint32_t depth, uint32_t block_width, uint32_t block_height); +void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp deleted file mode 100644 index 962921483..000000000 --- a/src/video_core/textures/convert.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <cstring> -#include <tuple> -#include <vector> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/surface.h" -#include "video_core/textures/astc.h" -#include "video_core/textures/convert.h" - -namespace Tegra::Texture { - -using VideoCore::Surface::PixelFormat; - -template <bool reverse> -void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { - union S8Z24 { - BitField<0, 24, u32> z24; - BitField<24, 8, u32> s8; - }; - static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); - - union Z24S8 { - BitField<0, 8, u32> s8; - BitField<8, 24, u32> z24; - }; - static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); - - S8Z24 s8z24_pixel{}; - Z24S8 z24s8_pixel{}; - constexpr auto bpp{ - VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; - for (std::size_t y = 0; y < height; ++y) { - for (std::size_t x = 0; x < width; ++x) { - const std::size_t offset{bpp * (y * width + x)}; - if constexpr (reverse) { - std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); - s8z24_pixel.s8.Assign(z24s8_pixel.s8); - s8z24_pixel.z24.Assign(z24s8_pixel.z24); - std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); - } else { - std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); - z24s8_pixel.s8.Assign(s8z24_pixel.s8); - z24s8_pixel.z24.Assign(s8z24_pixel.z24); - std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); - } - } - } -} - -static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { - SwapS8Z24ToZ24S8<false>(data, width, height); -} - -static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { - SwapS8Z24ToZ24S8<true>(data, width, height); -} - -void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, - u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { - if (convert_astc && IsPixelFormatASTC(pixel_format)) { - // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. - u32 block_width{}; - u32 block_height{}; - std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); - const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress( - in_data, width, height, depth, block_width, block_height); - std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); - - } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { - Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); - } -} - -void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, - bool convert_astc, bool convert_s8z24) { - if (convert_astc && IsPixelFormatASTC(pixel_format)) { - LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", - static_cast<u32>(pixel_format)); - UNREACHABLE(); - - } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { - Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); - } -} - -} // namespace Tegra::Texture diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h deleted file mode 100644 index d5d6c77bb..000000000 --- a/src/video_core/textures/convert.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace VideoCore::Surface { -enum class PixelFormat; -} - -namespace Tegra::Texture { - -void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, - u32 width, u32 height, u32 depth, bool convert_astc, - bool convert_s8z24); - -void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, - u32 height, u32 depth, bool convert_astc, bool convert_s8z24); - -} // namespace Tegra::Texture diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 16d46a018..9f5181318 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -2,204 +2,111 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <cmath> #include <cstring> +#include <span> +#include <utility> + #include "common/alignment.h" #include "common/assert.h" #include "common/bit_util.h" +#include "common/div_ceil.h" #include "video_core/gpu.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" namespace Tegra::Texture { -namespace { +namespace { /** - * This table represents the internal swizzle of a gob, - * in format 16 bytes x 2 sector packing. + * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. * Calculates the offset of an (x, y) position within a swizzled texture. * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 */ -template <std::size_t N, std::size_t M, u32 Align> -struct alignas(64) SwizzleTable { - static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); - constexpr SwizzleTable() { - for (u32 y = 0; y < N; ++y) { - for (u32 x = 0; x < M; ++x) { - const u32 x2 = x * Align; - values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + - ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16)); - } +constexpr SwizzleTable MakeSwizzleTableConst() { + SwizzleTable table{}; + for (u32 y = 0; y < table.size(); ++y) { + for (u32 x = 0; x < table[0].size(); ++x) { + table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + + (y % 2) * 16 + (x % 16); } } - const std::array<u16, M>& operator[](std::size_t index) const { - return values[index]; - } - std::array<std::array<u16, M>, N> values{}; -}; + return table; +} -constexpr u32 FAST_SWIZZLE_ALIGN = 16; +constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); -constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); -constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); +template <bool TO_LINEAR> +void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { + // The origin of the transformation can be configured here, leave it as zero as the current API + // doesn't expose it. + static constexpr u32 origin_x = 0; + static constexpr u32 origin_y = 0; + static constexpr u32 origin_z = 0; -/** - * This function manages ALL the GOBs(Group of Bytes) Inside a single block. - * Instead of going gob by gob, we map the coordinates inside a block and manage from - * those. Block_Width is assumed to be 1. - */ -void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, - const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, - const u32 y_end, const u32 z_end, const u32 tile_offset, - const u32 xy_block_size, const u32 layer_z, const u32 stride_x, - const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { - std::array<u8*, 2> data_ptrs; - u32 z_address = tile_offset; - - for (u32 z = z_start; z < z_end; z++) { - u32 y_address = z_address; - u32 pixel_base = layer_z * z + y_start * stride_x; - for (u32 y = y_start; y < y_end; y++) { - const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; - for (u32 x = x_start; x < x_end; x++) { - const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; - const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; - data_ptrs[unswizzle] = swizzled_data + swizzle_offset; - data_ptrs[!unswizzle] = unswizzled_data + pixel_index; - std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); - } - pixel_base += stride_x; - if ((y + 1) % GOB_SIZE_Y == 0) - y_address += GOB_SIZE; - } - z_address += xy_block_size; - } -} + // We can configure here a custom pitch + // As it's not exposed 'width * bpp' will be the expected pitch. + const u32 pitch = width * bytes_per_pixel; + const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; -/** - * This function manages ALL the GOBs(Group of Bytes) Inside a single block. - * Instead of going gob by gob, we map the coordinates inside a block and manage from - * those. Block_Width is assumed to be 1. - */ -void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, - const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, - const u32 y_end, const u32 z_end, const u32 tile_offset, - const u32 xy_block_size, const u32 layer_z, const u32 stride_x, - const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { - std::array<u8*, 2> data_ptrs; - u32 z_address = tile_offset; - const u32 x_startb = x_start * bytes_per_pixel; - const u32 x_endb = x_end * bytes_per_pixel; - - for (u32 z = z_start; z < z_end; z++) { - u32 y_address = z_address; - u32 pixel_base = layer_z * z + y_start * stride_x; - for (u32 y = y_start; y < y_end; y++) { - const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; - for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { - const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; - const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; - const u32 pixel_index{out_x + pixel_base}; - data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; - data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; - std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); - } - pixel_base += stride_x; - if ((y + 1) % GOB_SIZE_Y == 0) - y_address += GOB_SIZE; - } - z_address += xy_block_size; - } -} + const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); + const u32 slice_size = + Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; -/** - * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. - * The body of this function takes care of splitting the swizzled texture into blocks, - * and managing the extents of it. Once all the parameters of a single block are obtained, - * the function calls 'ProcessBlock' to process that particular Block. - * - * Documentation for the memory layout and decoding can be found at: - * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces - */ -template <bool fast> -void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, - const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, - const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, - const u32 width_spacing) { - auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; - const u32 stride_x = width * out_bytes_per_pixel; - const u32 layer_z = height * stride_x; - const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; - constexpr u32 gob_elements_y = GOB_SIZE_Y; - constexpr u32 gob_elements_z = GOB_SIZE_Z; - const u32 block_x_elements = gob_elements_x; - const u32 block_y_elements = gob_elements_y * block_height; - const u32 block_z_elements = gob_elements_z * block_depth; - const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); - const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); - const u32 blocks_on_y = div_ceil(height, block_y_elements); - const u32 blocks_on_z = div_ceil(depth, block_z_elements); - const u32 xy_block_size = GOB_SIZE * block_height; - const u32 block_size = xy_block_size * block_depth; - u32 tile_offset = 0; - for (u32 zb = 0; zb < blocks_on_z; zb++) { - const u32 z_start = zb * block_z_elements; - const u32 z_end = std::min(depth, z_start + block_z_elements); - for (u32 yb = 0; yb < blocks_on_y; yb++) { - const u32 y_start = yb * block_y_elements; - const u32 y_end = std::min(height, y_start + block_y_elements); - for (u32 xb = 0; xb < blocks_on_x; xb++) { - const u32 x_start = xb * block_x_elements; - const u32 x_end = std::min(width, x_start + block_x_elements); - if constexpr (fast) { - FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, - z_start, x_end, y_end, z_end, tile_offset, xy_block_size, - layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); - } else { - PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, - z_start, x_end, y_end, z_end, tile_offset, xy_block_size, - layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); - } - tile_offset += block_size; + const u32 block_height_mask = (1U << block_height) - 1; + const u32 block_depth_mask = (1U << block_depth) - 1; + const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; + + for (u32 slice = 0; slice < depth; ++slice) { + const u32 z = slice + origin_z; + const u32 offset_z = (z >> block_depth) * slice_size + + ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); + for (u32 line = 0; line < height; ++line) { + const u32 y = line + origin_y; + const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; + + const u32 block_y = y >> GOB_SIZE_Y_SHIFT; + const u32 offset_y = (block_y >> block_height) * block_size + + ((block_y & block_height_mask) << GOB_SIZE_SHIFT); + + for (u32 column = 0; column < width; ++column) { + const u32 x = (column + origin_x) * bytes_per_pixel; + const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; + + const u32 base_swizzled_offset = offset_z + offset_y + offset_x; + const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X]; + + const u32 unswizzled_offset = + slice * pitch * height + line * pitch + column * bytes_per_pixel; + + u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; + const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; + std::memcpy(dst, src, bytes_per_pixel); } } } } - } // Anonymous namespace -void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, - u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, - bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { - const u32 block_height_size{1U << block_height}; - const u32 block_depth_size{1U << block_depth}; - if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { - SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height_size, - block_depth_size, width_spacing); - } else { - SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height_size, - block_depth_size, width_spacing); - } +SwizzleTable MakeSwizzleTable() { + return SWIZZLE_TABLE; } -void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, - u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, - u32 block_depth, u32 width_spacing) { - CopySwizzledData((width + tile_size_x - 1) / tile_size_x, - (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, - bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth, - width_spacing); +void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); } -std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, - u32 width, u32 height, u32 depth, u32 block_height, - u32 block_depth, u32 width_spacing) { - std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); - UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, - width, height, depth, block_height, block_depth, width_spacing); - return unswizzled_data; +void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); } void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, @@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 const u32 gob_address_y = (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; - const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 dst_x = x + offset_x; const u32 gob_address = @@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); const u32 block_height_mask = (1U << block_height) - 1; - const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; + const u32 x_shift = GOB_SIZE_SHIFT + block_height; for (u32 line = 0; line < line_count; ++line) { const u32 src_y = line + origin_y; - const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; const u32 src_offset_y = (block_y >> block_height) * block_size + @@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; for (u32 line = 0; line < line_count; ++line) { - const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y]; const u32 block_y = line / GOB_SIZE_Y; const u32 dst_offset_y = (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; @@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 const std::size_t gob_address_y = (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; - const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; + const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 01e156bc8..d7cdc81e8 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -4,7 +4,8 @@ #pragma once -#include <vector> +#include <span> + #include "common/common_types.h" #include "video_core/textures/texture.h" @@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8; constexpr u32 GOB_SIZE_Z = 1; constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; -constexpr std::size_t GOB_SIZE_X_SHIFT = 6; -constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; -constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; -constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; - -/// Unswizzles a swizzled texture without changing its format. -void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, - u32 bytes_per_pixel, u32 width, u32 height, u32 depth, - u32 block_height = TICEntry::DefaultBlockHeight, - u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); - -/// Unswizzles a swizzled texture without changing its format. -std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, - u32 width, u32 height, u32 depth, - u32 block_height = TICEntry::DefaultBlockHeight, - u32 block_depth = TICEntry::DefaultBlockHeight, - u32 width_spacing = 0); - -/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. -void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, - u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, - bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); +constexpr u32 GOB_SIZE_X_SHIFT = 6; +constexpr u32 GOB_SIZE_Y_SHIFT = 3; +constexpr u32 GOB_SIZE_Z_SHIFT = 0; +constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>; + +/// Returns a z-order swizzle table +SwizzleTable MakeSwizzleTable(); + +/// Unswizzles a block linear texture into linear memory. +void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment = 1); + +/// Swizzles linear memory into a block linear texture. +void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment = 1); /// This function calculates the correct size of a texture depending if it's tiled or not. std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 4171e3ef2..ae5621a7d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -5,9 +5,13 @@ #include <algorithm> #include <array> +#include "common/cityhash.h" #include "core/settings.h" #include "video_core/textures/texture.h" +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; + namespace Tegra::Texture { namespace { @@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept { } // Anonymous namespace -std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { +std::array<float, 4> TSCEntry::BorderColor() const noexcept { if (!srgb_conversion) { return border_color; } @@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; } -float TSCEntry::GetMaxAnisotropy() const noexcept { +float TSCEntry::MaxAnisotropy() const noexcept { return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); } } // namespace Tegra::Texture + +size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept { + return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic); +} + +size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept { + return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc); +} diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 0574fef12..c1d14335e 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -53,27 +53,27 @@ enum class TextureFormat : u32 { BC4 = 0x27, BC5 = 0x28, S8D24 = 0x29, - X8Z24 = 0x2a, + X8D24 = 0x2a, D24S8 = 0x2b, - X4V4Z24__COV4R4V = 0x2c, - X4V4Z24__COV8R8V = 0x2d, - V8Z24__COV4R12V = 0x2e, + X4V4D24__COV4R4V = 0x2c, + X4V4D24__COV8R8V = 0x2d, + V8D24__COV4R12V = 0x2e, D32 = 0x2f, D32S8 = 0x30, - X8Z24_X20V4S8__COV4R4V = 0x31, - X8Z24_X20V4S8__COV8R8V = 0x32, - ZF32_X20V4X8__COV4R4V = 0x33, - ZF32_X20V4X8__COV8R8V = 0x34, - ZF32_X20V4S8__COV4R4V = 0x35, - ZF32_X20V4S8__COV8R8V = 0x36, - X8Z24_X16V8S8__COV4R12V = 0x37, - ZF32_X16V8X8__COV4R12V = 0x38, - ZF32_X16V8S8__COV4R12V = 0x39, + X8D24_X20V4S8__COV4R4V = 0x31, + X8D24_X20V4S8__COV8R8V = 0x32, + D32_X20V4X8__COV4R4V = 0x33, + D32_X20V4X8__COV8R8V = 0x34, + D32_X20V4S8__COV4R4V = 0x35, + D32_X20V4S8__COV8R8V = 0x36, + X8D24_X16V8S8__COV4R12V = 0x37, + D32_X16V8X8__COV4R12V = 0x38, + D32_X16V8S8__COV4R12V = 0x39, D16 = 0x3a, - V8Z24__COV8R24V = 0x3b, - X8Z24_X16V8S8__COV8R24V = 0x3c, - ZF32_X16V8X8__COV8R24V = 0x3d, - ZF32_X16V8S8__COV8R24V = 0x3e, + V8D24__COV8R24V = 0x3b, + X8D24_X16V8S8__COV8R24V = 0x3c, + D32_X16V8X8__COV8R24V = 0x3d, + D32_X16V8S8__COV8R24V = 0x3e, ASTC_2D_4X4 = 0x40, ASTC_2D_5X5 = 0x41, ASTC_2D_6X6 = 0x42, @@ -146,7 +146,7 @@ enum class MsaaMode : u32 { }; union TextureHandle { - TextureHandle(u32 raw) : raw{raw} {} + /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {} u32 raw; BitField<0, 20, u32> tic_id; @@ -155,124 +155,124 @@ union TextureHandle { static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); struct TICEntry { - static constexpr u32 DefaultBlockHeight = 16; - static constexpr u32 DefaultBlockDepth = 1; - - union { - u32 raw; - BitField<0, 7, TextureFormat> format; - BitField<7, 3, ComponentType> r_type; - BitField<10, 3, ComponentType> g_type; - BitField<13, 3, ComponentType> b_type; - BitField<16, 3, ComponentType> a_type; - - BitField<19, 3, SwizzleSource> x_source; - BitField<22, 3, SwizzleSource> y_source; - BitField<25, 3, SwizzleSource> z_source; - BitField<28, 3, SwizzleSource> w_source; - }; - u32 address_low; union { - BitField<0, 16, u32> address_high; - BitField<21, 3, TICHeaderVersion> header_version; - }; - union { - BitField<0, 3, u32> block_width; - BitField<3, 3, u32> block_height; - BitField<6, 3, u32> block_depth; + struct { + union { + BitField<0, 7, TextureFormat> format; + BitField<7, 3, ComponentType> r_type; + BitField<10, 3, ComponentType> g_type; + BitField<13, 3, ComponentType> b_type; + BitField<16, 3, ComponentType> a_type; + + BitField<19, 3, SwizzleSource> x_source; + BitField<22, 3, SwizzleSource> y_source; + BitField<25, 3, SwizzleSource> z_source; + BitField<28, 3, SwizzleSource> w_source; + }; + u32 address_low; + union { + BitField<0, 16, u32> address_high; + BitField<16, 5, u32> layer_base_3_7; + BitField<21, 3, TICHeaderVersion> header_version; + BitField<24, 1, u32> load_store_hint; + BitField<25, 4, u32> view_coherency_hash; + BitField<29, 3, u32> layer_base_8_10; + }; + union { + BitField<0, 3, u32> block_width; + BitField<3, 3, u32> block_height; + BitField<6, 3, u32> block_depth; - BitField<10, 3, u32> tile_width_spacing; + BitField<10, 3, u32> tile_width_spacing; - // High 16 bits of the pitch value - BitField<0, 16, u32> pitch_high; - BitField<26, 1, u32> use_header_opt_control; - BitField<27, 1, u32> depth_texture; - BitField<28, 4, u32> max_mip_level; + // High 16 bits of the pitch value + BitField<0, 16, u32> pitch_high; + BitField<26, 1, u32> use_header_opt_control; + BitField<27, 1, u32> depth_texture; + BitField<28, 4, u32> max_mip_level; - BitField<0, 16, u32> buffer_high_width_minus_one; - }; - union { - BitField<0, 16, u32> width_minus_1; - BitField<22, 1, u32> srgb_conversion; - BitField<23, 4, TextureType> texture_type; - BitField<29, 3, u32> border_size; + BitField<0, 16, u32> buffer_high_width_minus_one; + }; + union { + BitField<0, 16, u32> width_minus_one; + BitField<16, 3, u32> layer_base_0_2; + BitField<22, 1, u32> srgb_conversion; + BitField<23, 4, TextureType> texture_type; + BitField<29, 3, u32> border_size; - BitField<0, 16, u32> buffer_low_width_minus_one; - }; - union { - BitField<0, 16, u32> height_minus_1; - BitField<16, 14, u32> depth_minus_1; - }; - union { - BitField<6, 13, u32> mip_lod_bias; - BitField<27, 3, u32> max_anisotropy; + BitField<0, 16, u32> buffer_low_width_minus_one; + }; + union { + BitField<0, 16, u32> height_minus_1; + BitField<16, 14, u32> depth_minus_1; + BitField<30, 1, u32> is_sparse; + BitField<31, 1, u32> normalized_coords; + }; + union { + BitField<6, 13, u32> mip_lod_bias; + BitField<27, 3, u32> max_anisotropy; + }; + union { + BitField<0, 4, u32> res_min_mip_level; + BitField<4, 4, u32> res_max_mip_level; + BitField<8, 4, MsaaMode> msaa_mode; + BitField<12, 12, u32> min_lod_clamp; + }; + }; + std::array<u64, 4> raw; }; - union { - BitField<0, 4, u32> res_min_mip_level; - BitField<4, 4, u32> res_max_mip_level; - BitField<8, 4, MsaaMode> msaa_mode; - BitField<12, 12, u32> min_lod_clamp; - }; + constexpr bool operator==(const TICEntry& rhs) const noexcept { + return raw == rhs.raw; + } - GPUVAddr Address() const { + constexpr bool operator!=(const TICEntry& rhs) const noexcept { + return raw != rhs.raw; + } + + constexpr GPUVAddr Address() const { return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); } - u32 Pitch() const { + constexpr u32 Pitch() const { ASSERT(header_version == TICHeaderVersion::Pitch || header_version == TICHeaderVersion::PitchColorKey); // The pitch value is 21 bits, and is 32B aligned. return pitch_high << 5; } - u32 Width() const { + constexpr u32 Width() const { if (header_version != TICHeaderVersion::OneDBuffer) { - return width_minus_1 + 1; + return width_minus_one + 1; } - return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; + return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1; } - u32 Height() const { + constexpr u32 Height() const { return height_minus_1 + 1; } - u32 Depth() const { + constexpr u32 Depth() const { return depth_minus_1 + 1; } - u32 BlockWidth() const { - ASSERT(IsTiled()); - return block_width; - } - - u32 BlockHeight() const { - ASSERT(IsTiled()); - return block_height; - } - - u32 BlockDepth() const { - ASSERT(IsTiled()); - return block_depth; + constexpr u32 BaseLayer() const { + return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8; } - bool IsTiled() const { + constexpr bool IsBlockLinear() const { return header_version == TICHeaderVersion::BlockLinear || header_version == TICHeaderVersion::BlockLinearColorKey; } - bool IsLineal() const { + constexpr bool IsPitchLinear() const { return header_version == TICHeaderVersion::Pitch || header_version == TICHeaderVersion::PitchColorKey; } - bool IsBuffer() const { + constexpr bool IsBuffer() const { return header_version == TICHeaderVersion::OneDBuffer; } - - bool IsSrgbConversionEnabled() const { - return srgb_conversion != 0; - } }; static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); @@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 { Linear = 3, }; +enum class SamplerReduction : u32 { + WeightedAverage = 0, + Min = 1, + Max = 2, +}; + enum class Anisotropy { Default, Filter2x, @@ -333,8 +339,12 @@ struct TSCEntry { BitField<0, 2, TextureFilter> mag_filter; BitField<4, 2, TextureFilter> min_filter; BitField<6, 2, TextureMipmapFilter> mipmap_filter; + BitField<8, 1, u32> cubemap_anisotropy; BitField<9, 1, u32> cubemap_interface_filtering; + BitField<10, 2, SamplerReduction> reduction_filter; BitField<12, 13, u32> mip_lod_bias; + BitField<25, 1, u32> float_coord_normalization; + BitField<26, 5, u32> trilin_opt; }; union { BitField<0, 12, u32> min_lod_clamp; @@ -347,32 +357,45 @@ struct TSCEntry { }; std::array<f32, 4> border_color; }; - std::array<u8, 0x20> raw; + std::array<u64, 4> raw; }; - std::array<float, 4> GetBorderColor() const noexcept; + constexpr bool operator==(const TSCEntry& rhs) const noexcept { + return raw == rhs.raw; + } + + constexpr bool operator!=(const TSCEntry& rhs) const noexcept { + return raw != rhs.raw; + } + + std::array<float, 4> BorderColor() const noexcept; - float GetMaxAnisotropy() const noexcept; + float MaxAnisotropy() const noexcept; - float GetMinLod() const { + float MinLod() const { return static_cast<float>(min_lod_clamp) / 256.0f; } - float GetMaxLod() const { + float MaxLod() const { return static_cast<float>(max_lod_clamp) / 256.0f; } - float GetLodBias() const { + float LodBias() const { // Sign extend the 13-bit value. - constexpr u32 mask = 1U << (13 - 1); + static constexpr u32 mask = 1U << (13 - 1); return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; } }; static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); -struct FullTextureInfo { - TICEntry tic; - TSCEntry tsc; +} // namespace Tegra::Texture + +template <> +struct std::hash<Tegra::Texture::TICEntry> { + size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept; }; -} // namespace Tegra::Texture +template <> +struct std::hash<Tegra::Texture::TSCEntry> { + size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept; +}; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index dd5cee4a1..53444e945 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -7,13 +7,9 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" -#include "video_core/gpu_asynch.h" -#include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/renderer_opengl.h" -#ifdef HAS_VULKAN #include "video_core/renderer_vulkan/renderer_vulkan.h" -#endif #include "video_core/video_core.h" namespace { @@ -28,11 +24,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( case Settings::RendererBackend::OpenGL: return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, gpu, std::move(context)); -#ifdef HAS_VULKAN case Settings::RendererBackend::Vulkan: return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, gpu, std::move(context)); -#endif default: return nullptr; } @@ -43,13 +37,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { - std::unique_ptr<Tegra::GPU> gpu; const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); - if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec); - } else { - gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec); - } + std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>( + system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec); auto context = emu_window.CreateSharedContext(); const auto scope = context->Acquire(); diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index b16b54032..e1bab2112 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -141,6 +141,8 @@ add_executable(yuzu util/limitable_input_dialog.h util/sequence_dialog/sequence_dialog.cpp util/sequence_dialog/sequence_dialog.h + util/url_request_interceptor.cpp + util/url_request_interceptor.h util/util.cpp util/util.h compatdb.cpp @@ -217,7 +219,8 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core) target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::Widgets) target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) -if (ENABLE_VULKAN AND NOT WIN32) +target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include) +if (NOT WIN32) target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS}) endif() @@ -278,8 +281,3 @@ endif() if (NOT APPLE) target_compile_definitions(yuzu PRIVATE HAS_OPENGL) endif() - -if (ENABLE_VULKAN) - target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include) - target_compile_definitions(yuzu PRIVATE HAS_VULKAN) -endif() diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp index 6944478f3..a15e8ca2a 100644 --- a/src/yuzu/applets/controller.cpp +++ b/src/yuzu/applets/controller.cpp @@ -660,8 +660,8 @@ QtControllerSelector::QtControllerSelector(GMainWindow& parent) { QtControllerSelector::~QtControllerSelector() = default; void QtControllerSelector::ReconfigureControllers( - std::function<void()> callback, const Core::Frontend::ControllerParameters& parameters) const { - this->callback = std::move(callback); + std::function<void()> callback_, const Core::Frontend::ControllerParameters& parameters) const { + callback = std::move(callback_); emit MainWindowReconfigureControllers(parameters); } diff --git a/src/yuzu/applets/controller.h b/src/yuzu/applets/controller.h index 7a421d856..3518eed56 100644 --- a/src/yuzu/applets/controller.h +++ b/src/yuzu/applets/controller.h @@ -147,7 +147,7 @@ public: ~QtControllerSelector() override; void ReconfigureControllers( - std::function<void()> callback, + std::function<void()> callback_, const Core::Frontend::ControllerParameters& parameters) const override; signals: diff --git a/src/yuzu/applets/error.cpp b/src/yuzu/applets/error.cpp index 08ed57355..53a993cf6 100644 --- a/src/yuzu/applets/error.cpp +++ b/src/yuzu/applets/error.cpp @@ -17,7 +17,7 @@ QtErrorDisplay::QtErrorDisplay(GMainWindow& parent) { QtErrorDisplay::~QtErrorDisplay() = default; void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const { - this->callback = std::move(finished); + callback = std::move(finished); emit MainWindowDisplayError( tr("An error has occured.\nPlease try again or contact the developer of the " "software.\n\nError Code: %1-%2 (0x%3)") @@ -28,7 +28,7 @@ void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time, std::function<void()> finished) const { - this->callback = std::move(finished); + callback = std::move(finished); const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count()); emit MainWindowDisplayError( @@ -44,7 +44,7 @@ void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::secon void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_text, std::string fullscreen_text, std::function<void()> finished) const { - this->callback = std::move(finished); + callback = std::move(finished); emit MainWindowDisplayError( tr("An error has occured.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5") .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp index c9a2f8601..4bf2bfd40 100644 --- a/src/yuzu/applets/profile_select.cpp +++ b/src/yuzu/applets/profile_select.cpp @@ -150,8 +150,8 @@ QtProfileSelector::QtProfileSelector(GMainWindow& parent) { QtProfileSelector::~QtProfileSelector() = default; void QtProfileSelector::SelectProfile( - std::function<void(std::optional<Common::UUID>)> callback) const { - this->callback = std::move(callback); + std::function<void(std::optional<Common::UUID>)> callback_) const { + callback = std::move(callback_); emit MainWindowSelectProfile(); } diff --git a/src/yuzu/applets/profile_select.h b/src/yuzu/applets/profile_select.h index 29c33cca0..4e9037488 100644 --- a/src/yuzu/applets/profile_select.h +++ b/src/yuzu/applets/profile_select.h @@ -60,7 +60,7 @@ public: explicit QtProfileSelector(GMainWindow& parent); ~QtProfileSelector() override; - void SelectProfile(std::function<void(std::optional<Common::UUID>)> callback) const override; + void SelectProfile(std::function<void(std::optional<Common::UUID>)> callback_) const override; signals: void MainWindowSelectProfile() const; diff --git a/src/yuzu/applets/software_keyboard.cpp b/src/yuzu/applets/software_keyboard.cpp index af36f07c6..ab8cfd8ee 100644 --- a/src/yuzu/applets/software_keyboard.cpp +++ b/src/yuzu/applets/software_keyboard.cpp @@ -135,8 +135,8 @@ void QtSoftwareKeyboard::RequestText(std::function<void(std::optional<std::u16st } void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message, - std::function<void()> finished_check) const { - this->finished_check = std::move(finished_check); + std::function<void()> finished_check_) const { + finished_check = std::move(finished_check_); emit MainWindowTextCheckDialog(error_message); } diff --git a/src/yuzu/applets/software_keyboard.h b/src/yuzu/applets/software_keyboard.h index 44bcece75..9e1094cce 100644 --- a/src/yuzu/applets/software_keyboard.h +++ b/src/yuzu/applets/software_keyboard.h @@ -61,7 +61,7 @@ public: void RequestText(std::function<void(std::optional<std::u16string>)> out, Core::Frontend::SoftwareKeyboardParameters parameters) const override; void SendTextCheckDialog(std::u16string error_message, - std::function<void()> finished_check) const override; + std::function<void()> finished_check_) const override; signals: void MainWindowGetText(Core::Frontend::SoftwareKeyboardParameters parameters) const; diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp index 33f1c385d..e482ba029 100644 --- a/src/yuzu/applets/web_browser.cpp +++ b/src/yuzu/applets/web_browser.cpp @@ -1,115 +1,414 @@ -// Copyright 2018 yuzu Emulator Project +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <mutex> - +#ifdef YUZU_USE_QT_WEB_ENGINE #include <QKeyEvent> -#include "core/hle/lock.h" +#include <QWebEngineProfile> +#include <QWebEngineScript> +#include <QWebEngineScriptCollection> +#include <QWebEngineSettings> +#include <QWebEngineUrlScheme> +#endif + +#include "common/file_util.h" +#include "core/core.h" +#include "core/frontend/input_interpreter.h" +#include "input_common/keyboard.h" +#include "input_common/main.h" #include "yuzu/applets/web_browser.h" +#include "yuzu/applets/web_browser_scripts.h" #include "yuzu/main.h" +#include "yuzu/util/url_request_interceptor.h" #ifdef YUZU_USE_QT_WEB_ENGINE -constexpr char NX_SHIM_INJECT_SCRIPT[] = R"( - window.nx = {}; - window.nx.playReport = {}; - window.nx.playReport.setCounterSetIdentifier = function () { - console.log("nx.playReport.setCounterSetIdentifier called - unimplemented"); - }; +namespace { - window.nx.playReport.incrementCounter = function () { - console.log("nx.playReport.incrementCounter called - unimplemented"); - }; +constexpr int HIDButtonToKey(HIDButton button) { + switch (button) { + case HIDButton::DLeft: + case HIDButton::LStickLeft: + return Qt::Key_Left; + case HIDButton::DUp: + case HIDButton::LStickUp: + return Qt::Key_Up; + case HIDButton::DRight: + case HIDButton::LStickRight: + return Qt::Key_Right; + case HIDButton::DDown: + case HIDButton::LStickDown: + return Qt::Key_Down; + default: + return 0; + } +} + +} // Anonymous namespace + +QtNXWebEngineView::QtNXWebEngineView(QWidget* parent, Core::System& system, + InputCommon::InputSubsystem* input_subsystem_) + : QWebEngineView(parent), input_subsystem{input_subsystem_}, + url_interceptor(std::make_unique<UrlRequestInterceptor>()), + input_interpreter(std::make_unique<InputInterpreter>(system)), + default_profile{QWebEngineProfile::defaultProfile()}, + global_settings{QWebEngineSettings::globalSettings()} { + QWebEngineScript gamepad; + QWebEngineScript window_nx; + + gamepad.setName(QStringLiteral("gamepad_script.js")); + window_nx.setName(QStringLiteral("window_nx_script.js")); + + gamepad.setSourceCode(QString::fromStdString(GAMEPAD_SCRIPT)); + window_nx.setSourceCode(QString::fromStdString(WINDOW_NX_SCRIPT)); + + gamepad.setInjectionPoint(QWebEngineScript::DocumentCreation); + window_nx.setInjectionPoint(QWebEngineScript::DocumentCreation); + + gamepad.setWorldId(QWebEngineScript::MainWorld); + window_nx.setWorldId(QWebEngineScript::MainWorld); + + gamepad.setRunsOnSubFrames(true); + window_nx.setRunsOnSubFrames(true); + + default_profile->scripts()->insert(gamepad); + default_profile->scripts()->insert(window_nx); + + default_profile->setRequestInterceptor(url_interceptor.get()); + + global_settings->setAttribute(QWebEngineSettings::LocalContentCanAccessRemoteUrls, true); + global_settings->setAttribute(QWebEngineSettings::FullScreenSupportEnabled, true); + global_settings->setAttribute(QWebEngineSettings::AllowRunningInsecureContent, true); + global_settings->setAttribute(QWebEngineSettings::FocusOnNavigationEnabled, true); + global_settings->setAttribute(QWebEngineSettings::AllowWindowActivationFromJavaScript, true); + global_settings->setAttribute(QWebEngineSettings::ShowScrollBars, false); + + global_settings->setFontFamily(QWebEngineSettings::StandardFont, QStringLiteral("Roboto")); + + connect( + page(), &QWebEnginePage::windowCloseRequested, page(), + [this] { + if (page()->url() == url_interceptor->GetRequestedURL()) { + SetFinished(true); + SetExitReason(Service::AM::Applets::WebExitReason::WindowClosed); + } + }, + Qt::QueuedConnection); +} + +QtNXWebEngineView::~QtNXWebEngineView() { + SetFinished(true); + StopInputThread(); +} + +void QtNXWebEngineView::LoadLocalWebPage(std::string_view main_url, + std::string_view additional_args) { + is_local = true; + + LoadExtractedFonts(); + SetUserAgent(UserAgent::WebApplet); + SetFinished(false); + SetExitReason(Service::AM::Applets::WebExitReason::EndButtonPressed); + SetLastURL("http://localhost/"); + StartInputThread(); + + load(QUrl(QUrl::fromLocalFile(QString::fromStdString(std::string(main_url))).toString() + + QString::fromStdString(std::string(additional_args)))); +} + +void QtNXWebEngineView::LoadExternalWebPage(std::string_view main_url, + std::string_view additional_args) { + is_local = false; + + SetUserAgent(UserAgent::WebApplet); + SetFinished(false); + SetExitReason(Service::AM::Applets::WebExitReason::EndButtonPressed); + SetLastURL("http://localhost/"); + StartInputThread(); + + load(QUrl(QString::fromStdString(std::string(main_url)) + + QString::fromStdString(std::string(additional_args)))); +} + +void QtNXWebEngineView::SetUserAgent(UserAgent user_agent) { + const QString user_agent_str = [user_agent] { + switch (user_agent) { + case UserAgent::WebApplet: + default: + return QStringLiteral("WebApplet"); + case UserAgent::ShopN: + return QStringLiteral("ShopN"); + case UserAgent::LoginApplet: + return QStringLiteral("LoginApplet"); + case UserAgent::ShareApplet: + return QStringLiteral("ShareApplet"); + case UserAgent::LobbyApplet: + return QStringLiteral("LobbyApplet"); + case UserAgent::WifiWebAuthApplet: + return QStringLiteral("WifiWebAuthApplet"); + } + }(); + + QWebEngineProfile::defaultProfile()->setHttpUserAgent( + QStringLiteral("Mozilla/5.0 (Nintendo Switch; %1) AppleWebKit/606.4 " + "(KHTML, like Gecko) NF/6.0.1.15.4 NintendoBrowser/5.1.0.20389") + .arg(user_agent_str)); +} + +bool QtNXWebEngineView::IsFinished() const { + return finished; +} + +void QtNXWebEngineView::SetFinished(bool finished_) { + finished = finished_; +} + +Service::AM::Applets::WebExitReason QtNXWebEngineView::GetExitReason() const { + return exit_reason; +} + +void QtNXWebEngineView::SetExitReason(Service::AM::Applets::WebExitReason exit_reason_) { + exit_reason = exit_reason_; +} + +const std::string& QtNXWebEngineView::GetLastURL() const { + return last_url; +} + +void QtNXWebEngineView::SetLastURL(std::string last_url_) { + last_url = std::move(last_url_); +} + +QString QtNXWebEngineView::GetCurrentURL() const { + return url_interceptor->GetRequestedURL().toString(); +} + +void QtNXWebEngineView::hide() { + SetFinished(true); + StopInputThread(); - window.nx.footer = {}; - window.nx.footer.unsetAssign = function () { - console.log("nx.footer.unsetAssign called - unimplemented"); + QWidget::hide(); +} + +void QtNXWebEngineView::keyPressEvent(QKeyEvent* event) { + if (is_local) { + input_subsystem->GetKeyboard()->PressKey(event->key()); + } +} + +void QtNXWebEngineView::keyReleaseEvent(QKeyEvent* event) { + if (is_local) { + input_subsystem->GetKeyboard()->ReleaseKey(event->key()); + } +} + +template <HIDButton... T> +void QtNXWebEngineView::HandleWindowFooterButtonPressedOnce() { + const auto f = [this](HIDButton button) { + if (input_interpreter->IsButtonPressedOnce(button)) { + page()->runJavaScript( + QStringLiteral("yuzu_key_callbacks[%1] == null;").arg(static_cast<u8>(button)), + [&](const QVariant& variant) { + if (variant.toBool()) { + switch (button) { + case HIDButton::A: + SendMultipleKeyPressEvents<Qt::Key_A, Qt::Key_Space, Qt::Key_Return>(); + break; + case HIDButton::B: + SendKeyPressEvent(Qt::Key_B); + break; + case HIDButton::X: + SendKeyPressEvent(Qt::Key_X); + break; + case HIDButton::Y: + SendKeyPressEvent(Qt::Key_Y); + break; + default: + break; + } + } + }); + + page()->runJavaScript( + QStringLiteral("if (yuzu_key_callbacks[%1] != null) { yuzu_key_callbacks[%1](); }") + .arg(static_cast<u8>(button))); + } }; - var yuzu_key_callbacks = []; - window.nx.footer.setAssign = function(key, discard1, func, discard2) { - switch (key) { - case 'A': - yuzu_key_callbacks[0] = func; - break; - case 'B': - yuzu_key_callbacks[1] = func; - break; - case 'X': - yuzu_key_callbacks[2] = func; - break; - case 'Y': - yuzu_key_callbacks[3] = func; - break; - case 'L': - yuzu_key_callbacks[6] = func; - break; - case 'R': - yuzu_key_callbacks[7] = func; - break; + (f(T), ...); +} + +template <HIDButton... T> +void QtNXWebEngineView::HandleWindowKeyButtonPressedOnce() { + const auto f = [this](HIDButton button) { + if (input_interpreter->IsButtonPressedOnce(button)) { + SendKeyPressEvent(HIDButtonToKey(button)); } }; - var applet_done = false; - window.nx.endApplet = function() { - applet_done = true; + (f(T), ...); +} + +template <HIDButton... T> +void QtNXWebEngineView::HandleWindowKeyButtonHold() { + const auto f = [this](HIDButton button) { + if (input_interpreter->IsButtonHeld(button)) { + SendKeyPressEvent(HIDButtonToKey(button)); + } }; - window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } }; -)"; + (f(T), ...); +} + +void QtNXWebEngineView::SendKeyPressEvent(int key) { + if (key == 0) { + return; + } + + QCoreApplication::postEvent(focusProxy(), + new QKeyEvent(QKeyEvent::KeyPress, key, Qt::NoModifier)); + QCoreApplication::postEvent(focusProxy(), + new QKeyEvent(QKeyEvent::KeyRelease, key, Qt::NoModifier)); +} + +void QtNXWebEngineView::StartInputThread() { + if (input_thread_running) { + return; + } + + input_thread_running = true; + input_thread = std::thread(&QtNXWebEngineView::InputThread, this); +} + +void QtNXWebEngineView::StopInputThread() { + if (is_local) { + QWidget::releaseKeyboard(); + } -QString GetNXShimInjectionScript() { - return QString::fromStdString(NX_SHIM_INJECT_SCRIPT); + input_thread_running = false; + if (input_thread.joinable()) { + input_thread.join(); + } } -NXInputWebEngineView::NXInputWebEngineView(QWidget* parent) : QWebEngineView(parent) {} +void QtNXWebEngineView::InputThread() { + // Wait for 1 second before allowing any inputs to be processed. + std::this_thread::sleep_for(std::chrono::seconds(1)); + + if (is_local) { + QWidget::grabKeyboard(); + } + + while (input_thread_running) { + input_interpreter->PollInput(); + + HandleWindowFooterButtonPressedOnce<HIDButton::A, HIDButton::B, HIDButton::X, HIDButton::Y, + HIDButton::L, HIDButton::R>(); + + HandleWindowKeyButtonPressedOnce<HIDButton::DLeft, HIDButton::DUp, HIDButton::DRight, + HIDButton::DDown, HIDButton::LStickLeft, + HIDButton::LStickUp, HIDButton::LStickRight, + HIDButton::LStickDown>(); -void NXInputWebEngineView::keyPressEvent(QKeyEvent* event) { - parent()->event(event); + HandleWindowKeyButtonHold<HIDButton::DLeft, HIDButton::DUp, HIDButton::DRight, + HIDButton::DDown, HIDButton::LStickLeft, HIDButton::LStickUp, + HIDButton::LStickRight, HIDButton::LStickDown>(); + + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } } -void NXInputWebEngineView::keyReleaseEvent(QKeyEvent* event) { - parent()->event(event); +void QtNXWebEngineView::LoadExtractedFonts() { + QWebEngineScript nx_font_css; + QWebEngineScript load_nx_font; + + const QString fonts_dir = QString::fromStdString(Common::FS::SanitizePath( + fmt::format("{}/fonts", Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)))); + + nx_font_css.setName(QStringLiteral("nx_font_css.js")); + load_nx_font.setName(QStringLiteral("load_nx_font.js")); + + nx_font_css.setSourceCode( + QString::fromStdString(NX_FONT_CSS) + .arg(fonts_dir + QStringLiteral("/FontStandard.ttf")) + .arg(fonts_dir + QStringLiteral("/FontChineseSimplified.ttf")) + .arg(fonts_dir + QStringLiteral("/FontExtendedChineseSimplified.ttf")) + .arg(fonts_dir + QStringLiteral("/FontChineseTraditional.ttf")) + .arg(fonts_dir + QStringLiteral("/FontKorean.ttf")) + .arg(fonts_dir + QStringLiteral("/FontNintendoExtended.ttf")) + .arg(fonts_dir + QStringLiteral("/FontNintendoExtended2.ttf"))); + load_nx_font.setSourceCode(QString::fromStdString(LOAD_NX_FONT)); + + nx_font_css.setInjectionPoint(QWebEngineScript::DocumentReady); + load_nx_font.setInjectionPoint(QWebEngineScript::Deferred); + + nx_font_css.setWorldId(QWebEngineScript::MainWorld); + load_nx_font.setWorldId(QWebEngineScript::MainWorld); + + nx_font_css.setRunsOnSubFrames(true); + load_nx_font.setRunsOnSubFrames(true); + + default_profile->scripts()->insert(nx_font_css); + default_profile->scripts()->insert(load_nx_font); + + connect( + url_interceptor.get(), &UrlRequestInterceptor::FrameChanged, url_interceptor.get(), + [this] { + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + page()->runJavaScript(QString::fromStdString(LOAD_NX_FONT)); + }, + Qt::QueuedConnection); } #endif QtWebBrowser::QtWebBrowser(GMainWindow& main_window) { - connect(this, &QtWebBrowser::MainWindowOpenPage, &main_window, &GMainWindow::WebBrowserOpenPage, - Qt::QueuedConnection); - connect(&main_window, &GMainWindow::WebBrowserUnpackRomFS, this, - &QtWebBrowser::MainWindowUnpackRomFS, Qt::QueuedConnection); - connect(&main_window, &GMainWindow::WebBrowserFinishedBrowsing, this, - &QtWebBrowser::MainWindowFinishedBrowsing, Qt::QueuedConnection); + connect(this, &QtWebBrowser::MainWindowOpenWebPage, &main_window, + &GMainWindow::WebBrowserOpenWebPage, Qt::QueuedConnection); + connect(&main_window, &GMainWindow::WebBrowserExtractOfflineRomFS, this, + &QtWebBrowser::MainWindowExtractOfflineRomFS, Qt::QueuedConnection); + connect(&main_window, &GMainWindow::WebBrowserClosed, this, + &QtWebBrowser::MainWindowWebBrowserClosed, Qt::QueuedConnection); } QtWebBrowser::~QtWebBrowser() = default; -void QtWebBrowser::OpenPageLocal(std::string_view url, std::function<void()> unpack_romfs_callback, - std::function<void()> finished_callback) { - this->unpack_romfs_callback = std::move(unpack_romfs_callback); - this->finished_callback = std::move(finished_callback); +void QtWebBrowser::OpenLocalWebPage( + std::string_view local_url, std::function<void()> extract_romfs_callback_, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback_) const { + extract_romfs_callback = std::move(extract_romfs_callback_); + callback = std::move(callback_); + + const auto index = local_url.find('?'); + + if (index == std::string::npos) { + emit MainWindowOpenWebPage(local_url, "", true); + } else { + emit MainWindowOpenWebPage(local_url.substr(0, index), local_url.substr(index), true); + } +} + +void QtWebBrowser::OpenExternalWebPage( + std::string_view external_url, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback_) const { + callback = std::move(callback_); + + const auto index = external_url.find('?'); - const auto index = url.find('?'); if (index == std::string::npos) { - emit MainWindowOpenPage(url, ""); + emit MainWindowOpenWebPage(external_url, "", false); } else { - const auto front = url.substr(0, index); - const auto back = url.substr(index); - emit MainWindowOpenPage(front, back); + emit MainWindowOpenWebPage(external_url.substr(0, index), external_url.substr(index), + false); } } -void QtWebBrowser::MainWindowUnpackRomFS() { - // Acquire the HLE mutex - std::lock_guard lock{HLE::g_hle_lock}; - unpack_romfs_callback(); +void QtWebBrowser::MainWindowExtractOfflineRomFS() { + extract_romfs_callback(); } -void QtWebBrowser::MainWindowFinishedBrowsing() { - // Acquire the HLE mutex - std::lock_guard lock{HLE::g_hle_lock}; - finished_callback(); +void QtWebBrowser::MainWindowWebBrowserClosed(Service::AM::Applets::WebExitReason exit_reason, + std::string last_url) { + callback(exit_reason, last_url); } diff --git a/src/yuzu/applets/web_browser.h b/src/yuzu/applets/web_browser.h index b38437e46..47f960d69 100644 --- a/src/yuzu/applets/web_browser.h +++ b/src/yuzu/applets/web_browser.h @@ -1,10 +1,13 @@ -// Copyright 2018 yuzu Emulator Project +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once -#include <functional> +#include <atomic> +#include <memory> +#include <thread> + #include <QObject> #ifdef YUZU_USE_QT_WEB_ENGINE @@ -13,19 +16,172 @@ #include "core/frontend/applets/web_browser.h" +enum class HIDButton : u8; + class GMainWindow; +class InputInterpreter; +class UrlRequestInterceptor; + +namespace Core { +class System; +} + +namespace InputCommon { +class InputSubsystem; +} #ifdef YUZU_USE_QT_WEB_ENGINE -QString GetNXShimInjectionScript(); +enum class UserAgent { + WebApplet, + ShopN, + LoginApplet, + ShareApplet, + LobbyApplet, + WifiWebAuthApplet, +}; + +class QWebEngineProfile; +class QWebEngineSettings; + +class QtNXWebEngineView : public QWebEngineView { + Q_OBJECT -class NXInputWebEngineView : public QWebEngineView { public: - explicit NXInputWebEngineView(QWidget* parent = nullptr); + explicit QtNXWebEngineView(QWidget* parent, Core::System& system, + InputCommon::InputSubsystem* input_subsystem_); + ~QtNXWebEngineView() override; + + /** + * Loads a HTML document that exists locally. Cannot be used to load external websites. + * + * @param main_url The url to the file. + * @param additional_args Additional arguments appended to the main url. + */ + void LoadLocalWebPage(std::string_view main_url, std::string_view additional_args); + + /** + * Loads an external website. Cannot be used to load local urls. + * + * @param main_url The url to the website. + * @param additional_args Additional arguments appended to the main url. + */ + void LoadExternalWebPage(std::string_view main_url, std::string_view additional_args); + + /** + * Sets the background color of the web page. + * + * @param color The color to set. + */ + void SetBackgroundColor(QColor color); + + /** + * Sets the user agent of the web browser. + * + * @param user_agent The user agent enum. + */ + void SetUserAgent(UserAgent user_agent); + + [[nodiscard]] bool IsFinished() const; + void SetFinished(bool finished_); + + [[nodiscard]] Service::AM::Applets::WebExitReason GetExitReason() const; + void SetExitReason(Service::AM::Applets::WebExitReason exit_reason_); + + [[nodiscard]] const std::string& GetLastURL() const; + void SetLastURL(std::string last_url_); + + /** + * This gets the current URL that has been requested by the webpage. + * This only applies to the main frame. Sub frames and other resources are ignored. + * + * @return Currently requested URL + */ + [[nodiscard]] QString GetCurrentURL() const; + +public slots: + void hide(); protected: void keyPressEvent(QKeyEvent* event) override; void keyReleaseEvent(QKeyEvent* event) override; + +private: + /** + * Handles button presses to execute functions assigned in yuzu_key_callbacks. + * yuzu_key_callbacks contains specialized functions for the buttons in the window footer + * that can be overriden by games to achieve desired functionality. + * + * @tparam HIDButton The list of buttons contained in yuzu_key_callbacks + */ + template <HIDButton... T> + void HandleWindowFooterButtonPressedOnce(); + + /** + * Handles button presses and converts them into keyboard input. + * This should only be used to convert D-Pad or Analog Stick input into arrow keys. + * + * @tparam HIDButton The list of buttons that can be converted into keyboard input. + */ + template <HIDButton... T> + void HandleWindowKeyButtonPressedOnce(); + + /** + * Handles button holds and converts them into keyboard input. + * This should only be used to convert D-Pad or Analog Stick input into arrow keys. + * + * @tparam HIDButton The list of buttons that can be converted into keyboard input. + */ + template <HIDButton... T> + void HandleWindowKeyButtonHold(); + + /** + * Sends a key press event to QWebEngineView. + * + * @param key Qt key code. + */ + void SendKeyPressEvent(int key); + + /** + * Sends multiple key press events to QWebEngineView. + * + * @tparam int Qt key code. + */ + template <int... T> + void SendMultipleKeyPressEvents() { + (SendKeyPressEvent(T), ...); + } + + void StartInputThread(); + void StopInputThread(); + + /// The thread where input is being polled and processed. + void InputThread(); + + /// Loads the extracted fonts using JavaScript. + void LoadExtractedFonts(); + + InputCommon::InputSubsystem* input_subsystem; + + std::unique_ptr<UrlRequestInterceptor> url_interceptor; + + std::unique_ptr<InputInterpreter> input_interpreter; + + std::thread input_thread; + + std::atomic<bool> input_thread_running{}; + + std::atomic<bool> finished{}; + + Service::AM::Applets::WebExitReason exit_reason{ + Service::AM::Applets::WebExitReason::EndButtonPressed}; + + std::string last_url{"http://localhost/"}; + + bool is_local{}; + + QWebEngineProfile* default_profile; + QWebEngineSettings* global_settings; }; #endif @@ -34,19 +190,28 @@ class QtWebBrowser final : public QObject, public Core::Frontend::WebBrowserAppl Q_OBJECT public: - explicit QtWebBrowser(GMainWindow& main_window); + explicit QtWebBrowser(GMainWindow& parent); ~QtWebBrowser() override; - void OpenPageLocal(std::string_view url, std::function<void()> unpack_romfs_callback, - std::function<void()> finished_callback) override; + void OpenLocalWebPage(std::string_view local_url, std::function<void()> extract_romfs_callback_, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> + callback_) const override; + + void OpenExternalWebPage(std::string_view external_url, + std::function<void(Service::AM::Applets::WebExitReason, std::string)> + callback_) const override; signals: - void MainWindowOpenPage(std::string_view filename, std::string_view additional_args) const; + void MainWindowOpenWebPage(std::string_view main_url, std::string_view additional_args, + bool is_local) const; private: - void MainWindowUnpackRomFS(); - void MainWindowFinishedBrowsing(); + void MainWindowExtractOfflineRomFS(); + + void MainWindowWebBrowserClosed(Service::AM::Applets::WebExitReason exit_reason, + std::string last_url); + + mutable std::function<void()> extract_romfs_callback; - std::function<void()> unpack_romfs_callback; - std::function<void()> finished_callback; + mutable std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback; }; diff --git a/src/yuzu/applets/web_browser_scripts.h b/src/yuzu/applets/web_browser_scripts.h new file mode 100644 index 000000000..992837a85 --- /dev/null +++ b/src/yuzu/applets/web_browser_scripts.h @@ -0,0 +1,193 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +constexpr char NX_FONT_CSS[] = R"( +(function() { + css = document.createElement('style'); + css.type = 'text/css'; + css.id = 'nx_font'; + css.innerText = ` +/* FontStandard */ +@font-face { + font-family: 'FontStandard'; + src: url('%1') format('truetype'); +} + +/* FontChineseSimplified */ +@font-face { + font-family: 'FontChineseSimplified'; + src: url('%2') format('truetype'); +} + +/* FontExtendedChineseSimplified */ +@font-face { + font-family: 'FontExtendedChineseSimplified'; + src: url('%3') format('truetype'); +} + +/* FontChineseTraditional */ +@font-face { + font-family: 'FontChineseTraditional'; + src: url('%4') format('truetype'); +} + +/* FontKorean */ +@font-face { + font-family: 'FontKorean'; + src: url('%5') format('truetype'); +} + +/* FontNintendoExtended */ +@font-face { + font-family: 'NintendoExt003'; + src: url('%6') format('truetype'); +} + +/* FontNintendoExtended2 */ +@font-face { + font-family: 'NintendoExt003'; + src: url('%7') format('truetype'); +} +`; + + document.head.appendChild(css); +})(); +)"; + +constexpr char LOAD_NX_FONT[] = R"( +(function() { + var elements = document.querySelectorAll("*"); + + for (var i = 0; i < elements.length; i++) { + var style = window.getComputedStyle(elements[i], null); + if (style.fontFamily.includes("Arial") || style.fontFamily.includes("Calibri") || + style.fontFamily.includes("Century") || style.fontFamily.includes("Times New Roman")) { + elements[i].style.fontFamily = "FontStandard, FontChineseSimplified, FontExtendedChineseSimplified, FontChineseTraditional, FontKorean, NintendoExt003"; + } else { + elements[i].style.fontFamily = style.fontFamily + ", FontStandard, FontChineseSimplified, FontExtendedChineseSimplified, FontChineseTraditional, FontKorean, NintendoExt003"; + } + } +})(); +)"; + +constexpr char GAMEPAD_SCRIPT[] = R"( +window.addEventListener("gamepadconnected", function(e) { + console.log("Gamepad connected at index %d: %s. %d buttons, %d axes.", + e.gamepad.index, e.gamepad.id, e.gamepad.buttons.length, e.gamepad.axes.length); +}); + +window.addEventListener("gamepaddisconnected", function(e) { + console.log("Gamepad disconnected from index %d: %s", e.gamepad.index, e.gamepad.id); +}); +)"; + +constexpr char WINDOW_NX_SCRIPT[] = R"( +var end_applet = false; +var yuzu_key_callbacks = []; + +(function() { + class WindowNX { + constructor() { + yuzu_key_callbacks[1] = function() { window.history.back(); }; + yuzu_key_callbacks[2] = function() { window.nx.endApplet(); }; + } + + addEventListener(type, listener, options) { + console.log("nx.addEventListener called, type=%s", type); + + window.addEventListener(type, listener, options); + } + + endApplet() { + console.log("nx.endApplet called"); + + end_applet = true; + } + + playSystemSe(system_se) { + console.log("nx.playSystemSe is not implemented, system_se=%s", system_se); + } + + sendMessage(message) { + console.log("nx.sendMessage is not implemented, message=%s", message); + } + + setCursorScrollSpeed(scroll_speed) { + console.log("nx.setCursorScrollSpeed is not implemented, scroll_speed=%d", scroll_speed); + } + } + + class WindowNXFooter { + setAssign(key, label, func, option) { + console.log("nx.footer.setAssign called, key=%s", key); + + switch (key) { + case "A": + yuzu_key_callbacks[0] = func; + break; + case "B": + yuzu_key_callbacks[1] = func; + break; + case "X": + yuzu_key_callbacks[2] = func; + break; + case "Y": + yuzu_key_callbacks[3] = func; + break; + case "L": + yuzu_key_callbacks[6] = func; + break; + case "R": + yuzu_key_callbacks[7] = func; + break; + } + } + + setFixed(kind) { + console.log("nx.footer.setFixed is not implemented, kind=%s", kind); + } + + unsetAssign(key) { + console.log("nx.footer.unsetAssign called, key=%s", key); + + switch (key) { + case "A": + yuzu_key_callbacks[0] = function() {}; + break; + case "B": + yuzu_key_callbacks[1] = function() {}; + break; + case "X": + yuzu_key_callbacks[2] = function() {}; + break; + case "Y": + yuzu_key_callbacks[3] = function() {}; + break; + case "L": + yuzu_key_callbacks[6] = function() {}; + break; + case "R": + yuzu_key_callbacks[7] = function() {}; + break; + } + } + } + + class WindowNXPlayReport { + incrementCounter(counter_id) { + console.log("nx.playReport.incrementCounter is not implemented, counter_id=%d", counter_id); + } + + setCounterSetIdentifier(counter_id) { + console.log("nx.playReport.setCounterSetIdentifier is not implemented, counter_id=%d", counter_id); + } + } + + window.nx = new WindowNX(); + window.nx.footer = new WindowNXFooter(); + window.nx.playReport = new WindowNXPlayReport(); +})(); +)"; diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 489104d5f..e124836b5 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -19,7 +19,7 @@ #include <QOpenGLContext> #endif -#if !defined(WIN32) && HAS_VULKAN +#if !defined(WIN32) #include <qpa/qplatformnativeinterface.h> #endif @@ -241,14 +241,12 @@ private: std::unique_ptr<Core::Frontend::GraphicsContext> context; }; -#ifdef HAS_VULKAN class VulkanRenderWidget : public RenderWidget { public: explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) { windowHandle()->setSurfaceType(QWindow::VulkanSurface); } }; -#endif static Core::Frontend::WindowSystemType GetWindowSystemType() { // Determine WSI type based on Qt platform. @@ -268,7 +266,6 @@ static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* Core::Frontend::EmuWindow::WindowSystemInfo wsi; wsi.type = GetWindowSystemType(); -#ifdef HAS_VULKAN // Our Win32 Qt external doesn't have the private API. #if defined(WIN32) || defined(__APPLE__) wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; @@ -281,7 +278,6 @@ static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; #endif wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f; -#endif return wsi; } @@ -569,6 +565,10 @@ void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_p layout); } +bool GRenderWindow::IsLoadingComplete() const { + return first_frame; +} + void GRenderWindow::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal_size) { setMinimumSize(minimal_size.first, minimal_size.second); } @@ -594,18 +594,12 @@ bool GRenderWindow::InitializeOpenGL() { } bool GRenderWindow::InitializeVulkan() { -#ifdef HAS_VULKAN auto child = new VulkanRenderWidget(this); child_widget = child; child_widget->windowHandle()->create(); main_context = std::make_unique<DummyContext>(); return true; -#else - QMessageBox::critical(this, tr("Vulkan not available!"), - tr("yuzu has not been compiled with Vulkan support.")); - return false; -#endif } bool GRenderWindow::LoadOpenGL() { diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index a6d788d40..ebe5cb965 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -162,6 +162,8 @@ public: /// Destroy the previous run's child_widget which should also destroy the child_window void ReleaseRenderTarget(); + bool IsLoadingComplete() const; + void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); std::pair<u32, u32> ScaleTouch(const QPointF& pos) const; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index fcc38b3af..34c2a5f8b 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -511,6 +511,9 @@ void Config::ReadControlValues() { ReadTouchscreenValues(); ReadMotionTouchValues(); + Settings::values.emulate_analog_keyboard = + ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool(); + ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), false); ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"), true); @@ -634,8 +637,6 @@ void Config::ReadDebuggingValues() { // Intentionally not using the QT default setting as this is intended to be changed in the ini Settings::values.record_frame_times = qt_config->value(QStringLiteral("record_frame_times"), false).toBool(); - Settings::values.use_gdbstub = ReadSetting(QStringLiteral("use_gdbstub"), false).toBool(); - Settings::values.gdbstub_port = ReadSetting(QStringLiteral("gdbstub_port"), 24689).toInt(); Settings::values.program_args = ReadSetting(QStringLiteral("program_args"), QString{}).toString().toStdString(); Settings::values.dump_exefs = ReadSetting(QStringLiteral("dump_exefs"), false).toBool(); @@ -1186,6 +1187,8 @@ void Config::SaveControlValues() { QString::fromStdString(Settings::values.touch_device), QStringLiteral("engine:emu_window")); WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false); + WriteSetting(QStringLiteral("emulate_analog_keyboard"), + Settings::values.emulate_analog_keyboard, false); qt_config->endGroup(); } @@ -1231,8 +1234,6 @@ void Config::SaveDebuggingValues() { // Intentionally not using the QT default setting as this is intended to be changed in the ini qt_config->setValue(QStringLiteral("record_frame_times"), Settings::values.record_frame_times); - WriteSetting(QStringLiteral("use_gdbstub"), Settings::values.use_gdbstub, false); - WriteSetting(QStringLiteral("gdbstub_port"), Settings::values.gdbstub_port, 24689); WriteSetting(QStringLiteral("program_args"), QString::fromStdString(Settings::values.program_args), QString{}); WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); @@ -1588,14 +1589,12 @@ void Config::WriteSettingGlobal(const QString& name, const QVariant& value, bool void Config::Reload() { ReadValues(); - Settings::Sanitize(); // To apply default value changes SaveValues(); Settings::Apply(Core::System::GetInstance()); } void Config::Save() { - Settings::Sanitize(); SaveValues(); } diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 027099ab7..121873f95 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -28,9 +28,6 @@ ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::Co ConfigureDebug::~ConfigureDebug() = default; void ConfigureDebug::SetConfiguration() { - ui->toggle_gdbstub->setChecked(Settings::values.use_gdbstub); - ui->gdbport_spinbox->setEnabled(Settings::values.use_gdbstub); - ui->gdbport_spinbox->setValue(Settings::values.gdbstub_port); ui->toggle_console->setEnabled(!Core::System::GetInstance().IsPoweredOn()); ui->toggle_console->setChecked(UISettings::values.show_console); ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter)); @@ -45,8 +42,6 @@ void ConfigureDebug::SetConfiguration() { } void ConfigureDebug::ApplyConfiguration() { - Settings::values.use_gdbstub = ui->toggle_gdbstub->isChecked(); - Settings::values.gdbstub_port = ui->gdbport_spinbox->value(); UISettings::values.show_console = ui->toggle_console->isChecked(); Settings::values.log_filter = ui->log_filter_edit->text().toStdString(); Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 6f94fe304..9186aa732 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -7,7 +7,7 @@ <x>0</x> <y>0</y> <width>400</width> - <height>467</height> + <height>486</height> </rect> </property> <property name="windowTitle"> @@ -15,57 +15,6 @@ </property> <layout class="QVBoxLayout" name="verticalLayout_1"> <item> - <layout class="QVBoxLayout" name="verticalLayout_2"> - <item> - <widget class="QGroupBox" name="groupBox"> - <property name="title"> - <string>GDB</string> - </property> - <layout class="QVBoxLayout" name="verticalLayout_3"> - <item> - <layout class="QHBoxLayout" name="horizontalLayout_1"> - <item> - <widget class="QCheckBox" name="toggle_gdbstub"> - <property name="text"> - <string>Enable GDB Stub</string> - </property> - </widget> - </item> - <item> - <spacer name="horizontalSpacer"> - <property name="orientation"> - <enum>Qt::Horizontal</enum> - </property> - <property name="sizeHint" stdset="0"> - <size> - <width>40</width> - <height>20</height> - </size> - </property> - </spacer> - </item> - <item> - <widget class="QLabel" name="label_1"> - <property name="text"> - <string>Port:</string> - </property> - </widget> - </item> - <item> - <widget class="QSpinBox" name="gdbport_spinbox"> - <property name="maximum"> - <number>65536</number> - </property> - </widget> - </item> - </layout> - </item> - </layout> - </widget> - </item> - </layout> - </item> - <item> <widget class="QGroupBox" name="groupBox_2"> <property name="title"> <string>Logging</string> @@ -258,8 +207,6 @@ </layout> </widget> <tabstops> - <tabstop>toggle_gdbstub</tabstop> - <tabstop>gdbport_spinbox</tabstop> <tabstop>log_filter_edit</tabstop> <tabstop>toggle_console</tabstop> <tabstop>open_log_button</tabstop> @@ -269,22 +216,5 @@ <tabstop>quest_flag</tabstop> </tabstops> <resources/> - <connections> - <connection> - <sender>toggle_gdbstub</sender> - <signal>toggled(bool)</signal> - <receiver>gdbport_spinbox</receiver> - <slot>setEnabled(bool)</slot> - <hints> - <hint type="sourcelabel"> - <x>84</x> - <y>157</y> - </hint> - <hint type="destinationlabel"> - <x>342</x> - <y>158</y> - </hint> - </hints> - </connection> - </connections> + <connections/> </ui> diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 6fda0ce35..b78a5dff0 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -4,22 +4,17 @@ #include <QColorDialog> #include <QComboBox> -#ifdef HAS_VULKAN #include <QVulkanInstance> -#endif #include "common/common_types.h" #include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" #include "ui_configure_graphics.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "yuzu/configuration/configuration_shared.h" #include "yuzu/configuration/configure_graphics.h" -#ifdef HAS_VULKAN -#include "video_core/renderer_vulkan/renderer_vulkan.h" -#endif - ConfigureGraphics::ConfigureGraphics(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureGraphics) { vulkan_device = Settings::values.vulkan_device.GetValue(); @@ -218,12 +213,10 @@ void ConfigureGraphics::UpdateDeviceComboBox() { } void ConfigureGraphics::RetrieveVulkanDevices() { -#ifdef HAS_VULKAN vulkan_devices.clear(); - for (auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { + for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { vulkan_devices.push_back(QString::fromStdString(name)); } -#endif } Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { diff --git a/src/yuzu/configuration/configure_input_advanced.cpp b/src/yuzu/configuration/configure_input_advanced.cpp index abaf03630..4e557bc6f 100644 --- a/src/yuzu/configuration/configure_input_advanced.cpp +++ b/src/yuzu/configuration/configure_input_advanced.cpp @@ -121,6 +121,7 @@ void ConfigureInputAdvanced::ApplyConfiguration() { Settings::values.debug_pad_enabled = ui->debug_enabled->isChecked(); Settings::values.mouse_enabled = ui->mouse_enabled->isChecked(); Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked(); + Settings::values.emulate_analog_keyboard = ui->emulate_analog_keyboard->isChecked(); Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked(); } @@ -147,6 +148,7 @@ void ConfigureInputAdvanced::LoadConfiguration() { ui->debug_enabled->setChecked(Settings::values.debug_pad_enabled); ui->mouse_enabled->setChecked(Settings::values.mouse_enabled); ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled); + ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard); ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled); UpdateUIEnabled(); diff --git a/src/yuzu/configuration/configure_input_advanced.ui b/src/yuzu/configuration/configure_input_advanced.ui index a880a7c68..f207e5d3b 100644 --- a/src/yuzu/configuration/configure_input_advanced.ui +++ b/src/yuzu/configuration/configure_input_advanced.ui @@ -2546,14 +2546,27 @@ </property> </widget> </item> - <item row="4" column="2"> + <item row="1" column="0"> + <widget class="QCheckBox" name="emulate_analog_keyboard"> + <property name="minimumSize"> + <size> + <width>0</width> + <height>23</height> + </size> + </property> + <property name="text"> + <string>Emulate Analog with Keyboard Input</string> + </property> + </widget> + </item> + <item row="5" column="2"> <widget class="QPushButton" name="touchscreen_advanced"> <property name="text"> <string>Advanced</string> </property> </widget> </item> - <item row="1" column="1"> + <item row="2" column="1"> <spacer name="horizontalSpacer_8"> <property name="orientation"> <enum>Qt::Horizontal</enum> @@ -2569,21 +2582,21 @@ </property> </spacer> </item> - <item row="1" column="2"> + <item row="2" column="2"> <widget class="QPushButton" name="mouse_advanced"> <property name="text"> <string>Advanced</string> </property> </widget> </item> - <item row="4" column="0"> + <item row="5" column="0"> <widget class="QCheckBox" name="touchscreen_enabled"> <property name="text"> <string>Touchscreen</string> </property> </widget> </item> - <item row="1" column="0"> + <item row="2" column="0"> <widget class="QCheckBox" name="mouse_enabled"> <property name="minimumSize"> <size> @@ -2596,28 +2609,28 @@ </property> </widget> </item> - <item row="6" column="0"> + <item row="7" column="0"> <widget class="QLabel" name="motion_touch"> <property name="text"> <string>Motion / Touch</string> </property> </widget> </item> - <item row="6" column="2"> + <item row="7" column="2"> <widget class="QPushButton" name="buttonMotionTouch"> <property name="text"> <string>Configure</string> </property> </widget> </item> - <item row="5" column="0"> + <item row="6" column="0"> <widget class="QCheckBox" name="debug_enabled"> <property name="text"> <string>Debug Controller</string> </property> </widget> </item> - <item row="5" column="2"> + <item row="6" column="2"> <widget class="QPushButton" name="debug_configure"> <property name="text"> <string>Configure</string> diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index f9915fb7a..3c7500ee3 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -173,61 +173,31 @@ QString AnalogToText(const Common::ParamPackage& param, const std::string& dir) return ButtonToText(Common::ParamPackage{param.Get(dir, "")}); } - if (param.Get("engine", "") == "sdl") { + const auto engine_str = param.Get("engine", ""); + const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); + const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); + const bool invert_x = param.Get("invert_x", "+") == "-"; + const bool invert_y = param.Get("invert_y", "+") == "-"; + if (engine_str == "sdl" || engine_str == "gcpad" || engine_str == "mouse") { if (dir == "modifier") { return QObject::tr("[unused]"); } - if (dir == "left" || dir == "right") { - const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); - - return QObject::tr("Axis %1").arg(axis_x_str); - } - - if (dir == "up" || dir == "down") { - const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); - - return QObject::tr("Axis %1").arg(axis_y_str); + if (dir == "left") { + const QString invert_x_str = QString::fromStdString(invert_x ? "+" : "-"); + return QObject::tr("Axis %1%2").arg(axis_x_str, invert_x_str); } - - return {}; - } - - if (param.Get("engine", "") == "gcpad") { - if (dir == "modifier") { - return QObject::tr("[unused]"); + if (dir == "right") { + const QString invert_x_str = QString::fromStdString(invert_x ? "-" : "+"); + return QObject::tr("Axis %1%2").arg(axis_x_str, invert_x_str); } - - if (dir == "left" || dir == "right") { - const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); - - return QObject::tr("GC Axis %1").arg(axis_x_str); + if (dir == "up") { + const QString invert_y_str = QString::fromStdString(invert_y ? "-" : "+"); + return QObject::tr("Axis %1%2").arg(axis_y_str, invert_y_str); } - - if (dir == "up" || dir == "down") { - const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); - - return QObject::tr("GC Axis %1").arg(axis_y_str); - } - - return {}; - } - - if (param.Get("engine", "") == "mouse") { - if (dir == "modifier") { - return QObject::tr("[unused]"); - } - - if (dir == "left" || dir == "right") { - const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); - - return QObject::tr("Mouse %1").arg(axis_x_str); - } - - if (dir == "up" || dir == "down") { - const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); - - return QObject::tr("Mouse %1").arg(axis_y_str); + if (dir == "down") { + const QString invert_y_str = QString::fromStdString(invert_y ? "+" : "-"); + return QObject::tr("Axis %1%2").arg(axis_y_str, invert_y_str); } return {}; @@ -396,6 +366,25 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i analogs_param[analog_id].Clear(); analog_map_buttons[analog_id][sub_button_id]->setText(tr("[not set]")); }); + context_menu.addAction(tr("Invert axis"), [&] { + if (sub_button_id == 2 || sub_button_id == 3) { + const bool invert_value = + analogs_param[analog_id].Get("invert_x", "+") == "-"; + const std::string invert_str = invert_value ? "+" : "-"; + analogs_param[analog_id].Set("invert_x", invert_str); + } + if (sub_button_id == 0 || sub_button_id == 1) { + const bool invert_value = + analogs_param[analog_id].Get("invert_y", "+") == "-"; + const std::string invert_str = invert_value ? "+" : "-"; + analogs_param[analog_id].Set("invert_y", invert_str); + } + for (int sub_button_id = 0; sub_button_id < ANALOG_SUB_BUTTONS_NUM; + ++sub_button_id) { + analog_map_buttons[analog_id][sub_button_id]->setText(AnalogToText( + analogs_param[analog_id], analog_sub_buttons[sub_button_id])); + } + }); context_menu.exec(analog_map_buttons[analog_id][sub_button_id]->mapToGlobal( menu_location)); }); diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp index 2afac591a..c2a7113da 100644 --- a/src/yuzu/configuration/configure_motion_touch.cpp +++ b/src/yuzu/configuration/configure_motion_touch.cpp @@ -183,8 +183,8 @@ void ConfigureMotionTouch::ConnectEvents() { } void ConfigureMotionTouch::OnUDPAddServer() { - QRegExp re(tr("^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[" - "0-9][0-9]?)$")); // a valid ip address + QRegExp re(tr(R"re(^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4]" + "[0-9]|[01]?[0-9][0-9]?)$)re")); // a valid ip address bool ok; QString port_text = ui->udp_port->text(); QString server_text = ui->udp_server->text(); diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index 0e26f765b..efdc6aa50 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp @@ -48,7 +48,7 @@ private: MicroProfileDialog::MicroProfileDialog(QWidget* parent) : QWidget(parent, Qt::Dialog) { setObjectName(QStringLiteral("MicroProfile")); - setWindowTitle(tr("MicroProfile")); + setWindowTitle(tr("&MicroProfile")); resize(1000, 600); // Remove the "?" button from the titlebar and enable the maximize button setWindowFlags((windowFlags() & ~Qt::WindowContextHelpButtonHint) | diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index a20824719..0925c10b4 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -13,10 +13,10 @@ #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/mutex.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/readable_event.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/synchronization_object.h" #include "core/hle/kernel/thread.h" #include "core/memory.h" @@ -101,7 +101,7 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList() }; const auto& system = Core::System::GetInstance(); - add_threads(system.GlobalScheduler().GetThreadList()); + add_threads(system.GlobalSchedulerContext().GetThreadList()); return item_list; } @@ -349,14 +349,14 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { list.push_back(std::make_unique<WaitTreeText>(tr("processor = %1").arg(processor))); list.push_back( std::make_unique<WaitTreeText>(tr("ideal core = %1").arg(thread.GetIdealCore()))); - list.push_back( - std::make_unique<WaitTreeText>(tr("affinity mask = %1").arg(thread.GetAffinityMask()))); + list.push_back(std::make_unique<WaitTreeText>( + tr("affinity mask = %1").arg(thread.GetAffinityMask().GetAffinityMask()))); list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadID()))); list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)") .arg(thread.GetPriority()) .arg(thread.GetNominalPriority()))); list.push_back(std::make_unique<WaitTreeText>( - tr("last running ticks = %1").arg(thread.GetLastRunningTicks()))); + tr("last running ticks = %1").arg(thread.GetLastScheduledTick()))); const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); if (mutex_wait_address != 0) { @@ -457,7 +457,7 @@ void WaitTreeModel::InitItems() { thread_items = WaitTreeItem::MakeThreadItemList(); } -WaitTreeWidget::WaitTreeWidget(QWidget* parent) : QDockWidget(tr("Wait Tree"), parent) { +WaitTreeWidget::WaitTreeWidget(QWidget* parent) : QDockWidget(tr("&Wait Tree"), parent) { setObjectName(QStringLiteral("WaitTreeWidget")); view = new QTreeView(this); view->setHeaderHidden(true); diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 248855aff..df935022d 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h @@ -174,7 +174,8 @@ public: } bool operator<(const QStandardItem& other) const override { - return data(CompatNumberRole) < other.data(CompatNumberRole); + return data(CompatNumberRole).value<QString>() < + other.data(CompatNumberRole).value<QString>(); } }; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 26f5e42ed..ab66d7f93 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -28,8 +28,6 @@ #include "core/hle/service/am/applet_ae.h" #include "core/hle/service/am/applet_oe.h" #include "core/hle/service/am/applets/applets.h" -#include "core/hle/service/hid/controllers/npad.h" -#include "core/hle/service/hid/hid.h" // These are wrappers to avoid the calls to CreateDirectory and CreateFile because of the Windows // defines. @@ -83,6 +81,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "core/core.h" #include "core/crypto/key_manager.h" #include "core/file_sys/card_image.h" +#include "core/file_sys/common_funcs.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/control_metadata.h" #include "core/file_sys/patch_manager.h" @@ -124,14 +123,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "yuzu/discord_impl.h" #endif -#ifdef YUZU_USE_QT_WEB_ENGINE -#include <QWebEngineProfile> -#include <QWebEngineScript> -#include <QWebEngineScriptCollection> -#include <QWebEngineSettings> -#include <QWebEngineView> -#endif - #ifdef QT_STATICPLUGIN Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin); #endif @@ -148,8 +139,6 @@ __declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1; constexpr int default_mouse_timeout = 2500; -constexpr u64 DLC_BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000; - /** * "Callouts" are one-time instructional messages shown to the user. In the config settings, there * is a bitfield "callout_flags" options, used to track if a message has already been shown to the @@ -191,6 +180,30 @@ static void InitializeLogging() { #endif } +static void RemoveCachedContents() { + const auto offline_fonts = Common::FS::SanitizePath( + fmt::format("{}/fonts", Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)), + Common::FS::DirectorySeparator::PlatformDefault); + + const auto offline_manual = Common::FS::SanitizePath( + fmt::format("{}/offline_web_applet_manual", + Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)), + Common::FS::DirectorySeparator::PlatformDefault); + const auto offline_legal_information = Common::FS::SanitizePath( + fmt::format("{}/offline_web_applet_legal_information", + Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)), + Common::FS::DirectorySeparator::PlatformDefault); + const auto offline_system_data = Common::FS::SanitizePath( + fmt::format("{}/offline_web_applet_system_data", + Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)), + Common::FS::DirectorySeparator::PlatformDefault); + + Common::FS::DeleteDirRecursively(offline_fonts); + Common::FS::DeleteDirRecursively(offline_manual); + Common::FS::DeleteDirRecursively(offline_legal_information); + Common::FS::DeleteDirRecursively(offline_system_data); +} + GMainWindow::GMainWindow() : input_subsystem{std::make_shared<InputCommon::InputSubsystem>()}, config{std::make_unique<Config>()}, vfs{std::make_shared<FileSys::RealVfsFilesystem>()}, @@ -259,6 +272,9 @@ GMainWindow::GMainWindow() FileSys::ContentProviderUnionSlot::FrontendManual, provider.get()); Core::System::GetInstance().GetFileSystemController().CreateFactories(*vfs); + // Remove cached contents generated during the previous session + RemoveCachedContents(); + // Gen keys if necessary OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning); @@ -350,150 +366,141 @@ void GMainWindow::SoftwareKeyboardInvokeCheckDialog(std::u16string error_message emit SoftwareKeyboardFinishedCheckDialog(); } +void GMainWindow::WebBrowserOpenWebPage(std::string_view main_url, std::string_view additional_args, + bool is_local) { #ifdef YUZU_USE_QT_WEB_ENGINE -void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view additional_args) { - NXInputWebEngineView web_browser_view(this); + if (disable_web_applet) { + emit WebBrowserClosed(Service::AM::Applets::WebExitReason::WindowClosed, + "http://localhost/"); + return; + } - // Scope to contain the QProgressDialog for initialization - { - QProgressDialog progress(this); - progress.setMinimumDuration(200); - progress.setLabelText(tr("Loading Web Applet...")); - progress.setRange(0, 4); - progress.setValue(0); - progress.show(); + QtNXWebEngineView web_browser_view(this, Core::System::GetInstance(), input_subsystem.get()); - auto future = QtConcurrent::run([this] { emit WebBrowserUnpackRomFS(); }); + ui.action_Pause->setEnabled(false); + ui.action_Restart->setEnabled(false); + ui.action_Stop->setEnabled(false); - while (!future.isFinished()) - QApplication::processEvents(); + { + QProgressDialog loading_progress(this); + loading_progress.setLabelText(tr("Loading Web Applet...")); + loading_progress.setRange(0, 3); + loading_progress.setValue(0); + + if (is_local && !Common::FS::Exists(std::string(main_url))) { + loading_progress.show(); - progress.setValue(1); + auto future = QtConcurrent::run([this] { emit WebBrowserExtractOfflineRomFS(); }); - // Load the special shim script to handle input and exit. - QWebEngineScript nx_shim; - nx_shim.setSourceCode(GetNXShimInjectionScript()); - nx_shim.setWorldId(QWebEngineScript::MainWorld); - nx_shim.setName(QStringLiteral("nx_inject.js")); - nx_shim.setInjectionPoint(QWebEngineScript::DocumentCreation); - nx_shim.setRunsOnSubFrames(true); - web_browser_view.page()->profile()->scripts()->insert(nx_shim); + while (!future.isFinished()) { + QCoreApplication::processEvents(); + } + } - web_browser_view.load( - QUrl(QUrl::fromLocalFile(QString::fromStdString(std::string(filename))).toString() + - QString::fromStdString(std::string(additional_args)))); + loading_progress.setValue(1); - progress.setValue(2); + if (is_local) { + web_browser_view.LoadLocalWebPage(main_url, additional_args); + } else { + web_browser_view.LoadExternalWebPage(main_url, additional_args); + } - render_window->hide(); - web_browser_view.setFocus(); + if (render_window->IsLoadingComplete()) { + render_window->hide(); + } const auto& layout = render_window->GetFramebufferLayout(); web_browser_view.resize(layout.screen.GetWidth(), layout.screen.GetHeight()); web_browser_view.move(layout.screen.left, layout.screen.top + menuBar()->height()); web_browser_view.setZoomFactor(static_cast<qreal>(layout.screen.GetWidth()) / - Layout::ScreenUndocked::Width); - web_browser_view.settings()->setAttribute( - QWebEngineSettings::LocalContentCanAccessRemoteUrls, true); + static_cast<qreal>(Layout::ScreenUndocked::Width)); + web_browser_view.setFocus(); web_browser_view.show(); - progress.setValue(3); + loading_progress.setValue(2); - QApplication::processEvents(); + QCoreApplication::processEvents(); - progress.setValue(4); + loading_progress.setValue(3); } - bool finished = false; - QAction* exit_action = new QAction(tr("Exit Web Applet"), this); - connect(exit_action, &QAction::triggered, this, [&finished] { finished = true; }); - ui.menubar->addAction(exit_action); + bool exit_check = false; - auto& npad = - Core::System::GetInstance() - .ServiceManager() - .GetService<Service::HID::Hid>("hid") - ->GetAppletResource() - ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad); - - const auto fire_js_keypress = [&web_browser_view](u32 key_code) { - web_browser_view.page()->runJavaScript( - QStringLiteral("document.dispatchEvent(new KeyboardEvent('keydown', {'key': %1}));") - .arg(key_code)); - }; + // TODO (Morph): Remove this + QAction* exit_action = new QAction(tr("Disable Web Applet"), this); + connect(exit_action, &QAction::triggered, this, [this, &web_browser_view] { + const auto result = QMessageBox::warning( + this, tr("Disable Web Applet"), + tr("Disabling the web applet will cause it to not be shown again for the rest of the " + "emulated session. This can lead to undefined behavior and should only be used with " + "Super Mario 3D All-Stars. Are you sure you want to disable the web applet?"), + QMessageBox::Yes | QMessageBox::No); + if (result == QMessageBox::Yes) { + disable_web_applet = true; + web_browser_view.SetFinished(true); + } + }); + ui.menubar->addAction(exit_action); - QMessageBox::information( - this, tr("Exit"), - tr("To exit the web application, use the game provided controls to select exit, select the " - "'Exit Web Applet' option in the menu bar, or press the 'Enter' key.")); - - bool running_exit_check = false; - while (!finished) { - QApplication::processEvents(); - - if (!running_exit_check) { - web_browser_view.page()->runJavaScript(QStringLiteral("applet_done;"), - [&](const QVariant& res) { - running_exit_check = false; - if (res.toBool()) - finished = true; - }); - running_exit_check = true; + while (!web_browser_view.IsFinished()) { + QCoreApplication::processEvents(); + + if (!exit_check) { + web_browser_view.page()->runJavaScript( + QStringLiteral("end_applet;"), [&](const QVariant& variant) { + exit_check = false; + if (variant.toBool()) { + web_browser_view.SetFinished(true); + web_browser_view.SetExitReason( + Service::AM::Applets::WebExitReason::EndButtonPressed); + } + }); + + exit_check = true; } - const auto input = npad.GetAndResetPressState(); - for (std::size_t i = 0; i < Settings::NativeButton::NumButtons; ++i) { - if ((input & (1 << i)) != 0) { - LOG_DEBUG(Frontend, "firing input for button id={:02X}", i); - web_browser_view.page()->runJavaScript( - QStringLiteral("yuzu_key_callbacks[%1]();").arg(i)); + if (web_browser_view.GetCurrentURL().contains(QStringLiteral("localhost"))) { + if (!web_browser_view.IsFinished()) { + web_browser_view.SetFinished(true); + web_browser_view.SetExitReason(Service::AM::Applets::WebExitReason::CallbackURL); } + + web_browser_view.SetLastURL(web_browser_view.GetCurrentURL().toStdString()); } - if (input & 0x00888000) // RStick Down | LStick Down | DPad Down - fire_js_keypress(40); // Down Arrow Key - else if (input & 0x00444000) // RStick Right | LStick Right | DPad Right - fire_js_keypress(39); // Right Arrow Key - else if (input & 0x00222000) // RStick Up | LStick Up | DPad Up - fire_js_keypress(38); // Up Arrow Key - else if (input & 0x00111000) // RStick Left | LStick Left | DPad Left - fire_js_keypress(37); // Left Arrow Key - else if (input & 0x00000001) // A Button - fire_js_keypress(13); // Enter Key + std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + const auto exit_reason = web_browser_view.GetExitReason(); + const auto last_url = web_browser_view.GetLastURL(); + web_browser_view.hide(); - render_window->show(); + render_window->setFocus(); - ui.menubar->removeAction(exit_action); - // Needed to update render window focus/show and remove menubar action - QApplication::processEvents(); - emit WebBrowserFinishedBrowsing(); -} + if (render_window->IsLoadingComplete()) { + render_window->show(); + } -#else + ui.action_Pause->setEnabled(true); + ui.action_Restart->setEnabled(true); + ui.action_Stop->setEnabled(true); -void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view additional_args) { -#ifndef __linux__ - QMessageBox::warning( - this, tr("Web Applet"), - tr("This version of yuzu was built without QtWebEngine support, meaning that yuzu cannot " - "properly display the game manual or web page requested."), - QMessageBox::Ok, QMessageBox::Ok); -#endif + ui.menubar->removeAction(exit_action); - LOG_INFO(Frontend, - "(STUBBED) called - Missing QtWebEngine dependency needed to open website page at " - "'{}' with arguments '{}'!", - filename, additional_args); + QCoreApplication::processEvents(); - emit WebBrowserFinishedBrowsing(); -} + emit WebBrowserClosed(exit_reason, last_url); + +#else + + // Utilize the same fallback as the default web browser applet. + emit WebBrowserClosed(Service::AM::Applets::WebExitReason::WindowClosed, "http://localhost/"); #endif +} void GMainWindow::InitializeWidgets() { #ifdef YUZU_ENABLE_COMPATIBILITY_REPORTING @@ -573,9 +580,8 @@ void GMainWindow::InitializeWidgets() { if (emulation_running) { return; } - const bool is_async = !Settings::values.use_asynchronous_gpu_emulation.GetValue() || - Settings::values.use_multi_core.GetValue(); - Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async); + Settings::values.use_asynchronous_gpu_emulation.SetValue( + !Settings::values.use_asynchronous_gpu_emulation.GetValue()); async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); Settings::Apply(Core::System::GetInstance()); }); @@ -592,16 +598,13 @@ void GMainWindow::InitializeWidgets() { return; } Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue()); - const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue() || - Settings::values.use_multi_core.GetValue(); - Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async); - async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); Settings::Apply(Core::System::GetInstance()); }); multicore_status_button->setText(tr("MULTICORE")); multicore_status_button->setCheckable(true); multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); + statusBar()->insertPermanentWidget(0, multicore_status_button); statusBar()->insertPermanentWidget(0, async_status_button); @@ -615,11 +618,6 @@ void GMainWindow::InitializeWidgets() { }); renderer_status_button->toggle(); -#ifndef HAS_VULKAN - renderer_status_button->setChecked(false); - renderer_status_button->setCheckable(false); - renderer_status_button->setDisabled(true); -#else renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::Vulkan); connect(renderer_status_button, &QPushButton::clicked, [this] { @@ -634,7 +632,6 @@ void GMainWindow::InitializeWidgets() { Settings::Apply(Core::System::GetInstance()); }); -#endif // HAS_VULKAN statusBar()->insertPermanentWidget(0, renderer_status_button); statusBar()->setVisible(true); @@ -670,7 +667,7 @@ void GMainWindow::InitializeRecentFileMenuActions() { } ui.menu_recent_files->addSeparator(); QAction* action_clear_recent_files = new QAction(this); - action_clear_recent_files->setText(tr("Clear Recent Files")); + action_clear_recent_files->setText(tr("&Clear Recent Files")); connect(action_clear_recent_files, &QAction::triggered, this, [this] { UISettings::values.recent_files.clear(); UpdateRecentFiles(); @@ -932,7 +929,10 @@ void GMainWindow::ConnectMenuEvents() { &GMainWindow::OnDisplayTitleBars); connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar); connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible); - connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize); + connect(ui.action_Reset_Window_Size_720, &QAction::triggered, this, + &GMainWindow::ResetWindowSize720); + connect(ui.action_Reset_Window_Size_1080, &QAction::triggered, this, + &GMainWindow::ResetWindowSize1080); // Fullscreen connect(ui.action_Fullscreen, &QAction::triggered, this, &GMainWindow::ToggleFullscreen); @@ -994,7 +994,6 @@ bool GMainWindow::LoadROM(const QString& filename, std::size_t program_index) { system.SetAppletFrontendSet({ std::make_unique<QtControllerSelector>(*this), // Controller Selector - nullptr, // E-Commerce std::make_unique<QtErrorDisplay>(*this), // Error Display nullptr, // Parental Controls nullptr, // Photo Viewer @@ -1107,6 +1106,11 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) { ConfigureVibration::SetAllVibrationDevices(); + // Save configurations + UpdateUISettings(); + game_list->SaveInterfaceLayout(); + config->Save(); + Settings::LogSettings(); if (UISettings::values.select_user_on_boot) { @@ -1240,9 +1244,7 @@ void GMainWindow::ShutdownGame() { emu_frametime_label->setVisible(false); async_status_button->setEnabled(true); multicore_status_button->setEnabled(true); -#ifdef HAS_VULKAN renderer_status_button->setEnabled(true); -#endif emulation_running = false; @@ -1529,7 +1531,7 @@ void GMainWindow::RemoveAddOnContent(u64 program_id, const QString& entry_type) FileSys::TitleType::AOC, FileSys::ContentRecordType::Data); for (const auto& entry : dlc_entries) { - if ((entry.title_id & DLC_BASE_TITLE_ID_MASK) == program_id) { + if (FileSys::GetBaseTitleID(entry.title_id) == program_id) { const auto res = fs_controller.GetUserNANDContents()->RemoveExistingEntry(entry.title_id) || fs_controller.GetSDMCContents()->RemoveExistingEntry(entry.title_id); @@ -2103,11 +2105,12 @@ void GMainWindow::OnStartGame() { qRegisterMetaType<std::string>("std::string"); qRegisterMetaType<std::optional<std::u16string>>("std::optional<std::u16string>"); qRegisterMetaType<std::string_view>("std::string_view"); + qRegisterMetaType<Service::AM::Applets::WebExitReason>("Service::AM::Applets::WebExitReason"); connect(emu_thread.get(), &EmuThread::ErrorThrown, this, &GMainWindow::OnCoreError); ui.action_Start->setEnabled(false); - ui.action_Start->setText(tr("Continue")); + ui.action_Start->setText(tr("&Continue")); ui.action_Pause->setEnabled(true); ui.action_Stop->setEnabled(true); @@ -2251,7 +2254,7 @@ void GMainWindow::ToggleWindowMode() { } } -void GMainWindow::ResetWindowSize() { +void GMainWindow::ResetWindowSize720() { const auto aspect_ratio = Layout::EmulationAspectRatio( static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio.GetValue()), static_cast<float>(Layout::ScreenUndocked::Height) / Layout::ScreenUndocked::Width); @@ -2265,6 +2268,20 @@ void GMainWindow::ResetWindowSize() { } } +void GMainWindow::ResetWindowSize1080() { + const auto aspect_ratio = Layout::EmulationAspectRatio( + static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio.GetValue()), + static_cast<float>(Layout::ScreenDocked::Height) / Layout::ScreenDocked::Width); + if (!ui.action_Single_Window_Mode->isChecked()) { + render_window->resize(Layout::ScreenDocked::Height / aspect_ratio, + Layout::ScreenDocked::Height); + } else { + resize(Layout::ScreenDocked::Height / aspect_ratio, + Layout::ScreenDocked::Height + menuBar()->height() + + (ui.action_Show_Status_Bar->isChecked() ? statusBar()->height() : 0)); + } +} + void GMainWindow::OnConfigure() { const auto old_theme = UISettings::values.theme; const bool old_discord_presence = UISettings::values.enable_discord_presence; @@ -2512,14 +2529,27 @@ void GMainWindow::UpdateStatusBar() { void GMainWindow::UpdateStatusButtons() { dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); - Settings::values.use_asynchronous_gpu_emulation.SetValue( - Settings::values.use_asynchronous_gpu_emulation.GetValue() || - Settings::values.use_multi_core.GetValue()); async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); -#ifdef HAS_VULKAN renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::Vulkan); +} + +void GMainWindow::UpdateUISettings() { + if (!ui.action_Fullscreen->isChecked()) { + UISettings::values.geometry = saveGeometry(); + UISettings::values.renderwindow_geometry = render_window->saveGeometry(); + } + UISettings::values.state = saveState(); +#if MICROPROFILE_ENABLED + UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry(); + UISettings::values.microprofile_visible = microProfileDialog->isVisible(); #endif + UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked(); + UISettings::values.fullscreen = ui.action_Fullscreen->isChecked(); + UISettings::values.display_titlebar = ui.action_Display_Dock_Widget_Headers->isChecked(); + UISettings::values.show_filter_bar = ui.action_Show_Filter_Bar->isChecked(); + UISettings::values.show_status_bar = ui.action_Show_Status_Bar->isChecked(); + UISettings::values.first_start = false; } void GMainWindow::HideMouseCursor() { @@ -2709,7 +2739,7 @@ std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProv dlc_match.reserve(dlc_entries.size()); std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match), [&program_id, &installed](const FileSys::ContentProviderEntry& entry) { - return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == program_id && + return FileSys::GetBaseTitleID(entry.title_id) == program_id && installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success; }); @@ -2755,22 +2785,7 @@ void GMainWindow::closeEvent(QCloseEvent* event) { return; } - if (!ui.action_Fullscreen->isChecked()) { - UISettings::values.geometry = saveGeometry(); - UISettings::values.renderwindow_geometry = render_window->saveGeometry(); - } - UISettings::values.state = saveState(); -#if MICROPROFILE_ENABLED - UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry(); - UISettings::values.microprofile_visible = microProfileDialog->isVisible(); -#endif - UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked(); - UISettings::values.fullscreen = ui.action_Fullscreen->isChecked(); - UISettings::values.display_titlebar = ui.action_Display_Dock_Widget_Headers->isChecked(); - UISettings::values.show_filter_bar = ui.action_Show_Filter_Bar->isChecked(); - UISettings::values.show_status_bar = ui.action_Show_Status_Bar->isChecked(); - UISettings::values.first_start = false; - + UpdateUISettings(); game_list->SaveInterfaceLayout(); hotkey_registry.SaveHotkeys(); @@ -2946,7 +2961,7 @@ void GMainWindow::OnLanguageChanged(const QString& locale) { UpdateWindowTitle(); if (emulation_running) - ui.action_Start->setText(tr("Continue")); + ui.action_Start->setText(tr("&Continue")); } void GMainWindow::SetDiscordEnabled([[maybe_unused]] bool state) { diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 6242341d1..ea6d2c30d 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -55,6 +55,10 @@ namespace InputCommon { class InputSubsystem; } +namespace Service::AM::Applets { +enum class WebExitReason : u32; +} + enum class EmulatedDirectoryTarget { NAND, SDMC, @@ -126,8 +130,8 @@ signals: void SoftwareKeyboardFinishedText(std::optional<std::u16string> text); void SoftwareKeyboardFinishedCheckDialog(); - void WebBrowserUnpackRomFS(); - void WebBrowserFinishedBrowsing(); + void WebBrowserExtractOfflineRomFS(); + void WebBrowserClosed(Service::AM::Applets::WebExitReason exit_reason, std::string last_url); public slots: void OnLoadComplete(); @@ -138,7 +142,8 @@ public slots: void ProfileSelectorSelectProfile(); void SoftwareKeyboardGetText(const Core::Frontend::SoftwareKeyboardParameters& parameters); void SoftwareKeyboardInvokeCheckDialog(std::u16string error_message); - void WebBrowserOpenPage(std::string_view filename, std::string_view arguments); + void WebBrowserOpenWebPage(std::string_view main_url, std::string_view additional_args, + bool is_local); void OnAppFocusStateChanged(Qt::ApplicationState state); private: @@ -237,7 +242,8 @@ private slots: void ShowFullscreen(); void HideFullscreen(); void ToggleWindowMode(); - void ResetWindowSize(); + void ResetWindowSize720(); + void ResetWindowSize1080(); void OnCaptureScreenshot(); void OnCoreError(Core::System::ResultStatus, std::string); void OnReinitializeKeys(ReinitializeKeyBehavior behavior); @@ -257,6 +263,7 @@ private: const std::string& title_version = {}); void UpdateStatusBar(); void UpdateStatusButtons(); + void UpdateUISettings(); void HideMouseCursor(); void ShowMouseCursor(); void OpenURL(const QUrl& url); @@ -321,6 +328,9 @@ private: // Last game booted, used for multi-process apps QString last_filename_booted; + // Disables the web applet for the rest of the emulated session + bool disable_web_applet{}; + protected: void dropEvent(QDropEvent* event) override; void dragEnterEvent(QDragEnterEvent* event) override; diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index 2f3792247..e2ad5baf6 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -25,16 +25,7 @@ </property> <widget class="QWidget" name="centralwidget"> <layout class="QHBoxLayout" name="horizontalLayout"> - <property name="leftMargin"> - <number>0</number> - </property> - <property name="topMargin"> - <number>0</number> - </property> - <property name="rightMargin"> - <number>0</number> - </property> - <property name="bottomMargin"> + <property name="margin"> <number>0</number> </property> </layout> @@ -45,7 +36,7 @@ <x>0</x> <y>0</y> <width>1280</width> - <height>21</height> + <height>26</height> </rect> </property> <widget class="QMenu" name="menu_File"> @@ -54,7 +45,7 @@ </property> <widget class="QMenu" name="menu_recent_files"> <property name="title"> - <string>Recent Files</string> + <string>&Recent Files</string> </property> </widget> <addaction name="action_Install_File_NAND"/> @@ -89,7 +80,7 @@ </property> <widget class="QMenu" name="menu_View_Debugging"> <property name="title"> - <string>Debugging</string> + <string>&Debugging</string> </property> </widget> <addaction name="action_Fullscreen"/> @@ -97,13 +88,14 @@ <addaction name="action_Display_Dock_Widget_Headers"/> <addaction name="action_Show_Filter_Bar"/> <addaction name="action_Show_Status_Bar"/> - <addaction name="action_Reset_Window_Size"/> + <addaction name="action_Reset_Window_Size_720"/> + <addaction name="action_Reset_Window_Size_1080"/> <addaction name="separator"/> <addaction name="menu_View_Debugging"/> </widget> <widget class="QMenu" name="menu_Tools"> <property name="title"> - <string>Tools</string> + <string>&Tools</string> </property> <addaction name="action_Rederive"/> <addaction name="separator"/> @@ -131,17 +123,17 @@ <bool>true</bool> </property> <property name="text"> - <string>Install Files to NAND...</string> + <string>&Install Files to NAND...</string> </property> </action> <action name="action_Load_File"> <property name="text"> - <string>Load File...</string> + <string>L&oad File...</string> </property> </action> <action name="action_Load_Folder"> <property name="text"> - <string>Load Folder...</string> + <string>Load &Folder...</string> </property> </action> <action name="action_Exit"> @@ -175,12 +167,12 @@ </action> <action name="action_Rederive"> <property name="text"> - <string>Reinitialize keys...</string> + <string>&Reinitialize keys...</string> </property> </action> <action name="action_About"> <property name="text"> - <string>About yuzu</string> + <string>&About yuzu</string> </property> </action> <action name="action_Single_Window_Mode"> @@ -188,12 +180,12 @@ <bool>true</bool> </property> <property name="text"> - <string>Single Window Mode</string> + <string>Single &Window Mode</string> </property> </action> <action name="action_Configure"> <property name="text"> - <string>Configure...</string> + <string>Con&figure...</string> </property> </action> <action name="action_Display_Dock_Widget_Headers"> @@ -201,7 +193,7 @@ <bool>true</bool> </property> <property name="text"> - <string>Display Dock Widget Headers</string> + <string>Display D&ock Widget Headers</string> </property> </action> <action name="action_Show_Filter_Bar"> @@ -209,7 +201,7 @@ <bool>true</bool> </property> <property name="text"> - <string>Show Filter Bar</string> + <string>Show &Filter Bar</string> </property> </action> <action name="action_Show_Status_Bar"> @@ -217,12 +209,26 @@ <bool>true</bool> </property> <property name="text"> + <string>Show &Status Bar</string> + </property> + <property name="iconText"> <string>Show Status Bar</string> </property> </action> - <action name="action_Reset_Window_Size"> + <action name="action_Reset_Window_Size_720"> + <property name="text"> + <string>Reset Window Size to &720p</string> + </property> + <property name="iconText"> + <string>Reset Window Size to 720p</string> + </property> + </action> + <action name="action_Reset_Window_Size_1080"> <property name="text"> - <string>Reset Window Size</string> + <string>Reset Window Size to &1080p</string> + </property> + <property name="iconText"> + <string>Reset Window Size to 1080p</string> </property> </action> <action name="action_Fullscreen"> @@ -230,7 +236,7 @@ <bool>true</bool> </property> <property name="text"> - <string>Fullscreen</string> + <string>F&ullscreen</string> </property> </action> <action name="action_Restart"> @@ -238,7 +244,7 @@ <bool>false</bool> </property> <property name="text"> - <string>Restart</string> + <string>&Restart</string> </property> </action> <action name="action_Load_Amiibo"> @@ -246,7 +252,7 @@ <bool>false</bool> </property> <property name="text"> - <string>Load Amiibo...</string> + <string>Load &Amiibo...</string> </property> </action> <action name="action_Report_Compatibility"> @@ -254,7 +260,7 @@ <bool>false</bool> </property> <property name="text"> - <string>Report Compatibility</string> + <string>&Report Compatibility</string> </property> <property name="visible"> <bool>false</bool> @@ -262,22 +268,22 @@ </action> <action name="action_Open_Mods_Page"> <property name="text"> - <string>Open Mods Page</string> + <string>Open &Mods Page</string> </property> </action> <action name="action_Open_Quickstart_Guide"> <property name="text"> - <string>Open Quickstart Guide</string> + <string>Open &Quickstart Guide</string> </property> </action> <action name="action_Open_FAQ"> <property name="text"> - <string>FAQ</string> + <string>&FAQ</string> </property> </action> <action name="action_Open_yuzu_Folder"> <property name="text"> - <string>Open yuzu Folder</string> + <string>Open &yuzu Folder</string> </property> </action> <action name="action_Capture_Screenshot"> @@ -285,7 +291,7 @@ <bool>false</bool> </property> <property name="text"> - <string>Capture Screenshot</string> + <string>&Capture Screenshot</string> </property> </action> <action name="action_Configure_Current_Game"> @@ -293,7 +299,7 @@ <bool>false</bool> </property> <property name="text"> - <string>Configure Current Game...</string> + <string>Configure C&urrent Game...</string> </property> </action> </widget> diff --git a/src/yuzu/util/url_request_interceptor.cpp b/src/yuzu/util/url_request_interceptor.cpp new file mode 100644 index 000000000..2d491d8c0 --- /dev/null +++ b/src/yuzu/util/url_request_interceptor.cpp @@ -0,0 +1,32 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#ifdef YUZU_USE_QT_WEB_ENGINE + +#include "yuzu/util/url_request_interceptor.h" + +UrlRequestInterceptor::UrlRequestInterceptor(QObject* p) : QWebEngineUrlRequestInterceptor(p) {} + +UrlRequestInterceptor::~UrlRequestInterceptor() = default; + +void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo& info) { + const auto resource_type = info.resourceType(); + + switch (resource_type) { + case QWebEngineUrlRequestInfo::ResourceTypeMainFrame: + requested_url = info.requestUrl(); + emit FrameChanged(); + break; + case QWebEngineUrlRequestInfo::ResourceTypeSubFrame: + case QWebEngineUrlRequestInfo::ResourceTypeXhr: + emit FrameChanged(); + break; + } +} + +QUrl UrlRequestInterceptor::GetRequestedURL() const { + return requested_url; +} + +#endif diff --git a/src/yuzu/util/url_request_interceptor.h b/src/yuzu/util/url_request_interceptor.h new file mode 100644 index 000000000..8a7f7499f --- /dev/null +++ b/src/yuzu/util/url_request_interceptor.h @@ -0,0 +1,30 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#ifdef YUZU_USE_QT_WEB_ENGINE + +#include <QObject> +#include <QWebEngineUrlRequestInterceptor> + +class UrlRequestInterceptor : public QWebEngineUrlRequestInterceptor { + Q_OBJECT + +public: + explicit UrlRequestInterceptor(QObject* p = nullptr); + ~UrlRequestInterceptor() override; + + void interceptRequest(QWebEngineUrlRequestInfo& info) override; + + QUrl GetRequestedURL() const; + +signals: + void FrameChanged(); + +private: + QUrl requested_url; +}; + +#endif diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt index 57f9916f6..0b3f2cb54 100644 --- a/src/yuzu_cmd/CMakeLists.txt +++ b/src/yuzu_cmd/CMakeLists.txt @@ -4,26 +4,17 @@ add_executable(yuzu-cmd config.cpp config.h default_ini.h - emu_window/emu_window_sdl2_gl.cpp - emu_window/emu_window_sdl2_gl.h emu_window/emu_window_sdl2.cpp emu_window/emu_window_sdl2.h emu_window/emu_window_sdl2_gl.cpp emu_window/emu_window_sdl2_gl.h + emu_window/emu_window_sdl2_vk.cpp + emu_window/emu_window_sdl2_vk.h resource.h yuzu.cpp yuzu.rc ) -if (ENABLE_VULKAN) - target_sources(yuzu-cmd PRIVATE - emu_window/emu_window_sdl2_vk.cpp - emu_window/emu_window_sdl2_vk.h) - - target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include) - target_compile_definitions(yuzu-cmd PRIVATE HAS_VULKAN) -endif() - create_target_directory_groups(yuzu-cmd) target_link_libraries(yuzu-cmd PRIVATE common core input_common) @@ -33,6 +24,8 @@ if (MSVC) endif() target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads) +target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include) + if(UNIX AND NOT APPLE) install(TARGETS yuzu-cmd RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") endif() diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 34c9673bc..38075c345 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -345,7 +345,6 @@ void Config::ReadValues() { // System Settings::values.use_docked_mode.SetValue( sdl2_config->GetBoolean("System", "use_docked_mode", false)); - const auto size = sdl2_config->GetInteger("System", "users_size", 0); Settings::values.current_user = std::clamp<int>( sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1); @@ -430,9 +429,6 @@ void Config::ReadValues() { // Debugging Settings::values.record_frame_times = sdl2_config->GetBoolean("Debugging", "record_frame_times", false); - Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false); - Settings::values.gdbstub_port = - static_cast<u16>(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689)); Settings::values.program_args = sdl2_config->Get("Debugging", "program_args", ""); Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false); Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index bcbbcd4ca..2d4b98d9a 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -318,9 +318,6 @@ log_filter = *:Trace [Debugging] # Record frame time data, can be found in the log directory. Boolean value record_frame_times = -# Port for listening to GDB connections. -use_gdbstub=false -gdbstub_port=24689 # Determines whether or not yuzu will dump the ExeFS of all games it attempts to load while loading them dump_exefs=false # Determines whether or not yuzu will dump all NSOs it attempts to load while loading them diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index c2efe1ee6..2497c71ae 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -35,9 +35,7 @@ #include "yuzu_cmd/config.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" #include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h" -#ifdef HAS_VULKAN #include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" -#endif #ifdef _WIN32 // windows.h needs to be included before shellapi.h @@ -64,7 +62,6 @@ __declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1; static void PrintHelp(const char* argv0) { std::cout << "Usage: " << argv0 << " [options] <filename>\n" - "-g, --gdbport=NUMBER Enable gdb stub on port NUMBER\n" "-f, --fullscreen Start in fullscreen mode\n" "-h, --help Display this help and exit\n" "-v, --version Output version information and exit\n" @@ -96,8 +93,6 @@ int main(int argc, char** argv) { Config config; int option_index = 0; - bool use_gdbstub = Settings::values.use_gdbstub; - u32 gdb_port = static_cast<u32>(Settings::values.gdbstub_port); InitializeLogging(); @@ -116,26 +111,17 @@ int main(int argc, char** argv) { bool fullscreen = false; static struct option long_options[] = { - {"gdbport", required_argument, 0, 'g'}, {"fullscreen", no_argument, 0, 'f'}, - {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'v'}, - {"program", optional_argument, 0, 'p'}, {0, 0, 0, 0}, + {"fullscreen", no_argument, 0, 'f'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'v'}, + {"program", optional_argument, 0, 'p'}, + {0, 0, 0, 0}, }; while (optind < argc) { int arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index); if (arg != -1) { switch (static_cast<char>(arg)) { - case 'g': - errno = 0; - gdb_port = strtoul(optarg, &endarg, 0); - use_gdbstub = true; - if (endarg == optarg) - errno = EINVAL; - if (errno != 0) { - perror("--gdbport"); - exit(1); - } - break; case 'f': fullscreen = true; LOG_INFO(Frontend, "Starting in fullscreen mode..."); @@ -177,8 +163,6 @@ int main(int argc, char** argv) { InputCommon::InputSubsystem input_subsystem; // Apply the command line arguments - Settings::values.gdbstub_port = gdb_port; - Settings::values.use_gdbstub = use_gdbstub; Settings::Apply(system); std::unique_ptr<EmuWindow_SDL2> emu_window; @@ -187,13 +171,8 @@ int main(int argc, char** argv) { emu_window = std::make_unique<EmuWindow_SDL2_GL>(&input_subsystem, fullscreen); break; case Settings::RendererBackend::Vulkan: -#ifdef HAS_VULKAN emu_window = std::make_unique<EmuWindow_SDL2_VK>(&input_subsystem); break; -#else - LOG_CRITICAL(Frontend, "Vulkan backend has not been compiled!"); - return 1; -#endif } system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>()); diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index b6cdc7c1c..91684e96e 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp @@ -158,7 +158,6 @@ void Config::ReadValues() { Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false); // Debugging - Settings::values.use_gdbstub = false; Settings::values.program_args = ""; Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false); Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp index 50bd7ae41..6435ffabb 100644 --- a/src/yuzu_tester/yuzu.cpp +++ b/src/yuzu_tester/yuzu.cpp @@ -162,7 +162,6 @@ int main(int argc, char** argv) { Core::System& system{Core::System::GetInstance()}; - Settings::values.use_gdbstub = false; Settings::Apply(system); const auto emu_window{std::make_unique<EmuWindow_SDL2_Hide>()}; |