diff options
Diffstat (limited to 'src')
300 files changed, 6992 insertions, 1315 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index cb5c0f326..23d43a394 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -55,6 +55,7 @@ add_library(common STATIC dynamic_library.h error.cpp error.h + expected.h fiber.cpp fiber.h fs/file.cpp diff --git a/src/common/expected.h b/src/common/expected.h new file mode 100644 index 000000000..c8d8579c1 --- /dev/null +++ b/src/common/expected.h @@ -0,0 +1,987 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This is based on the proposed implementation of std::expected (P0323) +// https://github.com/TartanLlama/expected/blob/master/include/tl/expected.hpp + +#pragma once + +#include <type_traits> +#include <utility> + +namespace Common { + +template <typename T, typename E> +class Expected; + +template <typename E> +class Unexpected { +public: + Unexpected() = delete; + + constexpr explicit Unexpected(const E& e) : m_val{e} {} + + constexpr explicit Unexpected(E&& e) : m_val{std::move(e)} {} + + constexpr E& value() & { + return m_val; + } + + constexpr const E& value() const& { + return m_val; + } + + constexpr E&& value() && { + return std::move(m_val); + } + + constexpr const E&& value() const&& { + return std::move(m_val); + } + +private: + E m_val; +}; + +template <typename E> +constexpr auto operator<=>(const Unexpected<E>& lhs, const Unexpected<E>& rhs) { + return lhs.value() <=> rhs.value(); +} + +struct unexpect_t { + constexpr explicit unexpect_t() = default; +}; + +namespace detail { + +struct no_init_t { + constexpr explicit no_init_t() = default; +}; + +/** + * This specialization is for when T is not trivially destructible, + * so the destructor must be called on destruction of `expected' + * Additionally, this requires E to be trivially destructible + */ +template <typename T, typename E, bool = std::is_trivially_destructible_v<T>> +requires std::is_trivially_destructible_v<E> +struct expected_storage_base { + constexpr expected_storage_base() : m_val{T{}}, m_has_val{true} {} + + constexpr expected_storage_base(no_init_t) : m_has_val{false} {} + + template <typename... Args, std::enable_if_t<std::is_constructible_v<T, Args&&...>>* = nullptr> + constexpr expected_storage_base(std::in_place_t, Args&&... args) + : m_val{std::forward<Args>(args)...}, m_has_val{true} {} + + template <typename U, typename... Args, + std::enable_if_t<std::is_constructible_v<T, std::initializer_list<U>&, Args&&...>>* = + nullptr> + constexpr expected_storage_base(std::in_place_t, std::initializer_list<U> il, Args&&... args) + : m_val{il, std::forward<Args>(args)...}, m_has_val{true} {} + + template <typename... Args, std::enable_if_t<std::is_constructible_v<E, Args&&...>>* = nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args&&... args) + : m_unexpect{std::forward<Args>(args)...}, m_has_val{false} {} + + template <typename U, typename... Args, + std::enable_if_t<std::is_constructible_v<E, std::initializer_list<U>&, Args&&...>>* = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, std::initializer_list<U> il, + Args&&... args) + : m_unexpect{il, std::forward<Args>(args)...}, m_has_val{false} {} + + ~expected_storage_base() { + if (m_has_val) { + m_val.~T(); + } + } + + union { + T m_val; + Unexpected<E> m_unexpect; + }; + + bool m_has_val; +}; + +/** + * This specialization is for when T is trivially destructible, + * so the destructor of `expected` can be trivial + * Additionally, this requires E to be trivially destructible + */ +template <typename T, typename E> +requires std::is_trivially_destructible_v<E> +struct expected_storage_base<T, E, true> { + constexpr expected_storage_base() : m_val{T{}}, m_has_val{true} {} + + constexpr expected_storage_base(no_init_t) : m_has_val{false} {} + + template <typename... Args, std::enable_if_t<std::is_constructible_v<T, Args&&...>>* = nullptr> + constexpr expected_storage_base(std::in_place_t, Args&&... args) + : m_val{std::forward<Args>(args)...}, m_has_val{true} {} + + template <typename U, typename... Args, + std::enable_if_t<std::is_constructible_v<T, std::initializer_list<U>&, Args&&...>>* = + nullptr> + constexpr expected_storage_base(std::in_place_t, std::initializer_list<U> il, Args&&... args) + : m_val{il, std::forward<Args>(args)...}, m_has_val{true} {} + + template <typename... Args, std::enable_if_t<std::is_constructible_v<E, Args&&...>>* = nullptr> + constexpr explicit expected_storage_base(unexpect_t, Args&&... args) + : m_unexpect{std::forward<Args>(args)...}, m_has_val{false} {} + + template <typename U, typename... Args, + std::enable_if_t<std::is_constructible_v<E, std::initializer_list<U>&, Args&&...>>* = + nullptr> + constexpr explicit expected_storage_base(unexpect_t, std::initializer_list<U> il, + Args&&... args) + : m_unexpect{il, std::forward<Args>(args)...}, m_has_val{false} {} + + ~expected_storage_base() = default; + + union { + T m_val; + Unexpected<E> m_unexpect; + }; + + bool m_has_val; +}; + +template <typename T, typename E> +struct expected_operations_base : expected_storage_base<T, E> { + using expected_storage_base<T, E>::expected_storage_base; + + template <typename... Args> + void construct(Args&&... args) noexcept { + new (std::addressof(this->m_val)) T{std::forward<Args>(args)...}; + this->m_has_val = true; + } + + template <typename Rhs> + void construct_with(Rhs&& rhs) noexcept { + new (std::addressof(this->m_val)) T{std::forward<Rhs>(rhs).get()}; + this->m_has_val = true; + } + + template <typename... Args> + void construct_error(Args&&... args) noexcept { + new (std::addressof(this->m_unexpect)) Unexpected<E>{std::forward<Args>(args)...}; + this->m_has_val = false; + } + + void assign(const expected_operations_base& rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~Unexpected<E>(); + construct(rhs.get()); + } else { + assign_common(rhs); + } + } + + void assign(expected_operations_base&& rhs) noexcept { + if (!this->m_has_val && rhs.m_has_val) { + geterr().~Unexpected<E>(); + construct(std::move(rhs).get()); + } else { + assign_common(rhs); + } + } + + template <typename Rhs> + void assign_common(Rhs&& rhs) { + if (this->m_has_val) { + if (rhs.m_has_val) { + get() = std::forward<Rhs>(rhs).get(); + } else { + destroy_val(); + construct_error(std::forward<Rhs>(rhs).geterr()); + } + } else { + if (!rhs.m_has_val) { + geterr() = std::forward<Rhs>(rhs).geterr(); + } + } + } + + bool has_value() const { + return this->m_has_val; + } + + constexpr T& get() & { + return this->m_val; + } + + constexpr const T& get() const& { + return this->m_val; + } + + constexpr T&& get() && { + return std::move(this->m_val); + } + + constexpr const T&& get() const&& { + return std::move(this->m_val); + } + + constexpr Unexpected<E>& geterr() & { + return this->m_unexpect; + } + + constexpr const Unexpected<E>& geterr() const& { + return this->m_unexpect; + } + + constexpr Unexpected<E>&& geterr() && { + return std::move(this->m_unexpect); + } + + constexpr const Unexpected<E>&& geterr() const&& { + return std::move(this->m_unexpect); + } + + constexpr void destroy_val() { + get().~T(); + } +}; + +/** + * This manages conditionally having a trivial copy constructor + * This specialization is for when T is trivially copy constructible + * Additionally, this requires E to be trivially copy constructible + */ +template <typename T, typename E, bool = std::is_trivially_copy_constructible_v<T>> +requires std::is_trivially_copy_constructible_v<E> +struct expected_copy_base : expected_operations_base<T, E> { + using expected_operations_base<T, E>::expected_operations_base; +}; + +/** + * This specialization is for when T is not trivially copy constructible + * Additionally, this requires E to be trivially copy constructible + */ +template <typename T, typename E> +requires std::is_trivially_copy_constructible_v<E> +struct expected_copy_base<T, E, false> : expected_operations_base<T, E> { + using expected_operations_base<T, E>::expected_operations_base; + + expected_copy_base() = default; + + expected_copy_base(const expected_copy_base& rhs) + : expected_operations_base<T, E>{no_init_t{}} { + if (rhs.has_value()) { + this->construct_with(rhs); + } else { + this->construct_error(rhs.geterr()); + } + } + + expected_copy_base(expected_copy_base&&) = default; + + expected_copy_base& operator=(const expected_copy_base&) = default; + + expected_copy_base& operator=(expected_copy_base&&) = default; +}; + +/** + * This manages conditionally having a trivial move constructor + * This specialization is for when T is trivially move constructible + * Additionally, this requires E to be trivially move constructible + */ +template <typename T, typename E, bool = std::is_trivially_move_constructible_v<T>> +requires std::is_trivially_move_constructible_v<E> +struct expected_move_base : expected_copy_base<T, E> { + using expected_copy_base<T, E>::expected_copy_base; +}; + +/** + * This specialization is for when T is not trivially move constructible + * Additionally, this requires E to be trivially move constructible + */ +template <typename T, typename E> +requires std::is_trivially_move_constructible_v<E> +struct expected_move_base<T, E, false> : expected_copy_base<T, E> { + using expected_copy_base<T, E>::expected_copy_base; + + expected_move_base() = default; + + expected_move_base(const expected_move_base&) = default; + + expected_move_base(expected_move_base&& rhs) noexcept(std::is_nothrow_move_constructible_v<T>) + : expected_copy_base<T, E>{no_init_t{}} { + if (rhs.has_value()) { + this->construct_with(std::move(rhs)); + } else { + this->construct_error(std::move(rhs.geterr())); + } + } + + expected_move_base& operator=(const expected_move_base&) = default; + + expected_move_base& operator=(expected_move_base&&) = default; +}; + +/** + * This manages conditionally having a trivial copy assignment operator + * This specialization is for when T is trivially copy assignable + * Additionally, this requires E to be trivially copy assignable + */ +template <typename T, typename E, + bool = std::conjunction_v<std::is_trivially_copy_assignable<T>, + std::is_trivially_copy_constructible<T>, + std::is_trivially_destructible<T>>> +requires std::conjunction_v<std::is_trivially_copy_assignable<E>, + std::is_trivially_copy_constructible<E>, + std::is_trivially_destructible<E>> +struct expected_copy_assign_base : expected_move_base<T, E> { + using expected_move_base<T, E>::expected_move_base; +}; + +/** + * This specialization is for when T is not trivially copy assignable + * Additionally, this requires E to be trivially copy assignable + */ +template <typename T, typename E> +requires std::conjunction_v<std::is_trivially_copy_assignable<E>, + std::is_trivially_copy_constructible<E>, + std::is_trivially_destructible<E>> +struct expected_copy_assign_base<T, E, false> : expected_move_base<T, E> { + using expected_move_base<T, E>::expected_move_base; + + expected_copy_assign_base() = default; + + expected_copy_assign_base(const expected_copy_assign_base&) = default; + + expected_copy_assign_base(expected_copy_assign_base&&) = default; + + expected_copy_assign_base& operator=(const expected_copy_assign_base& rhs) { + this->assign(rhs); + return *this; + } + + expected_copy_assign_base& operator=(expected_copy_assign_base&&) = default; +}; + +/** + * This manages conditionally having a trivial move assignment operator + * This specialization is for when T is trivially move assignable + * Additionally, this requires E to be trivially move assignable + */ +template <typename T, typename E, + bool = std::conjunction_v<std::is_trivially_move_assignable<T>, + std::is_trivially_move_constructible<T>, + std::is_trivially_destructible<T>>> +requires std::conjunction_v<std::is_trivially_move_assignable<E>, + std::is_trivially_move_constructible<E>, + std::is_trivially_destructible<E>> +struct expected_move_assign_base : expected_copy_assign_base<T, E> { + using expected_copy_assign_base<T, E>::expected_copy_assign_base; +}; + +/** + * This specialization is for when T is not trivially move assignable + * Additionally, this requires E to be trivially move assignable + */ +template <typename T, typename E> +requires std::conjunction_v<std::is_trivially_move_assignable<E>, + std::is_trivially_move_constructible<E>, + std::is_trivially_destructible<E>> +struct expected_move_assign_base<T, E, false> : expected_copy_assign_base<T, E> { + using expected_copy_assign_base<T, E>::expected_copy_assign_base; + + expected_move_assign_base() = default; + + expected_move_assign_base(const expected_move_assign_base&) = default; + + expected_move_assign_base(expected_move_assign_base&&) = default; + + expected_move_assign_base& operator=(const expected_move_assign_base&) = default; + + expected_move_assign_base& operator=(expected_move_assign_base&& rhs) noexcept( + std::conjunction_v<std::is_nothrow_move_constructible<T>, + std::is_nothrow_move_assignable<T>>) { + this->assign(std::move(rhs)); + return *this; + } +}; + +/** + * expected_delete_ctor_base will conditionally delete copy and move constructors + * depending on whether T is copy/move constructible + * Additionally, this requires E to be copy/move constructible + */ +template <typename T, typename E, bool EnableCopy = std::is_copy_constructible_v<T>, + bool EnableMove = std::is_move_constructible_v<T>> +requires std::conjunction_v<std::is_copy_constructible<E>, std::is_move_constructible<E>> +struct expected_delete_ctor_base { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base&) = default; + expected_delete_ctor_base(expected_delete_ctor_base&&) noexcept = default; + expected_delete_ctor_base& operator=(const expected_delete_ctor_base&) = default; + expected_delete_ctor_base& operator=(expected_delete_ctor_base&&) noexcept = default; +}; + +template <typename T, typename E> +requires std::conjunction_v<std::is_copy_constructible<E>, std::is_move_constructible<E>> +struct expected_delete_ctor_base<T, E, true, false> { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base&) = default; + expected_delete_ctor_base(expected_delete_ctor_base&&) noexcept = delete; + expected_delete_ctor_base& operator=(const expected_delete_ctor_base&) = default; + expected_delete_ctor_base& operator=(expected_delete_ctor_base&&) noexcept = default; +}; + +template <typename T, typename E> +requires std::conjunction_v<std::is_copy_constructible<E>, std::is_move_constructible<E>> +struct expected_delete_ctor_base<T, E, false, true> { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base&) = delete; + expected_delete_ctor_base(expected_delete_ctor_base&&) noexcept = default; + expected_delete_ctor_base& operator=(const expected_delete_ctor_base&) = default; + expected_delete_ctor_base& operator=(expected_delete_ctor_base&&) noexcept = default; +}; + +template <typename T, typename E> +requires std::conjunction_v<std::is_copy_constructible<E>, std::is_move_constructible<E>> +struct expected_delete_ctor_base<T, E, false, false> { + expected_delete_ctor_base() = default; + expected_delete_ctor_base(const expected_delete_ctor_base&) = delete; + expected_delete_ctor_base(expected_delete_ctor_base&&) noexcept = delete; + expected_delete_ctor_base& operator=(const expected_delete_ctor_base&) = default; + expected_delete_ctor_base& operator=(expected_delete_ctor_base&&) noexcept = default; +}; + +/** + * expected_delete_assign_base will conditionally delete copy and move assignment operators + * depending on whether T is copy/move constructible + assignable + * Additionally, this requires E to be copy/move constructible + assignable + */ +template < + typename T, typename E, + bool EnableCopy = std::conjunction_v<std::is_copy_constructible<T>, std::is_copy_assignable<T>>, + bool EnableMove = std::conjunction_v<std::is_move_constructible<T>, std::is_move_assignable<T>>> +requires std::conjunction_v<std::is_copy_constructible<E>, std::is_move_constructible<E>, + std::is_copy_assignable<E>, std::is_move_assignable<E>> +struct expected_delete_assign_base { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base&) = default; + expected_delete_assign_base(expected_delete_assign_base&&) noexcept = default; + expected_delete_assign_base& operator=(const expected_delete_assign_base&) = default; + expected_delete_assign_base& operator=(expected_delete_assign_base&&) noexcept = default; +}; + +template <typename T, typename E> +requires std::conjunction_v<std::is_copy_constructible<E>, std::is_move_constructible<E>, + std::is_copy_assignable<E>, std::is_move_assignable<E>> +struct expected_delete_assign_base<T, E, true, false> { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base&) = default; + expected_delete_assign_base(expected_delete_assign_base&&) noexcept = default; + expected_delete_assign_base& operator=(const expected_delete_assign_base&) = default; + expected_delete_assign_base& operator=(expected_delete_assign_base&&) noexcept = delete; +}; + +template <typename T, typename E> +requires std::conjunction_v<std::is_copy_constructible<E>, std::is_move_constructible<E>, + std::is_copy_assignable<E>, std::is_move_assignable<E>> +struct expected_delete_assign_base<T, E, false, true> { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base&) = default; + expected_delete_assign_base(expected_delete_assign_base&&) noexcept = default; + expected_delete_assign_base& operator=(const expected_delete_assign_base&) = delete; + expected_delete_assign_base& operator=(expected_delete_assign_base&&) noexcept = default; +}; + +template <typename T, typename E> +requires std::conjunction_v<std::is_copy_constructible<E>, std::is_move_constructible<E>, + std::is_copy_assignable<E>, std::is_move_assignable<E>> +struct expected_delete_assign_base<T, E, false, false> { + expected_delete_assign_base() = default; + expected_delete_assign_base(const expected_delete_assign_base&) = default; + expected_delete_assign_base(expected_delete_assign_base&&) noexcept = default; + expected_delete_assign_base& operator=(const expected_delete_assign_base&) = delete; + expected_delete_assign_base& operator=(expected_delete_assign_base&&) noexcept = delete; +}; + +/** + * This is needed to be able to construct the expected_default_ctor_base which follows, + * while still conditionally deleting the default constructor. + */ +struct default_constructor_tag { + constexpr explicit default_constructor_tag() = default; +}; + +/** + * expected_default_ctor_base will ensure that expected + * has a deleted default constructor if T is not default constructible + * This specialization is for when T is default constructible + */ +template <typename T, typename E, bool Enable = std::is_default_constructible_v<T>> +struct expected_default_ctor_base { + constexpr expected_default_ctor_base() noexcept = default; + constexpr expected_default_ctor_base(expected_default_ctor_base const&) noexcept = default; + constexpr expected_default_ctor_base(expected_default_ctor_base&&) noexcept = default; + expected_default_ctor_base& operator=(expected_default_ctor_base const&) noexcept = default; + expected_default_ctor_base& operator=(expected_default_ctor_base&&) noexcept = default; + + constexpr explicit expected_default_ctor_base(default_constructor_tag) {} +}; + +template <typename T, typename E> +struct expected_default_ctor_base<T, E, false> { + constexpr expected_default_ctor_base() noexcept = delete; + constexpr expected_default_ctor_base(expected_default_ctor_base const&) noexcept = default; + constexpr expected_default_ctor_base(expected_default_ctor_base&&) noexcept = default; + expected_default_ctor_base& operator=(expected_default_ctor_base const&) noexcept = default; + expected_default_ctor_base& operator=(expected_default_ctor_base&&) noexcept = default; + + constexpr explicit expected_default_ctor_base(default_constructor_tag) {} +}; + +template <typename T, typename E, typename U> +using expected_enable_forward_value = + std::enable_if_t<std::is_constructible_v<T, U&&> && + !std::is_same_v<std::remove_cvref_t<U>, std::in_place_t> && + !std::is_same_v<Expected<T, E>, std::remove_cvref_t<U>> && + !std::is_same_v<Unexpected<E>, std::remove_cvref_t<U>>>; + +template <typename T, typename E, typename U, typename G, typename UR, typename GR> +using expected_enable_from_other = std::enable_if_t< + std::is_constructible_v<T, UR> && std::is_constructible_v<E, GR> && + !std::is_constructible_v<T, Expected<U, G>&> && !std::is_constructible_v<T, Expected<U, G>&&> && + !std::is_constructible_v<T, const Expected<U, G>&> && + !std::is_constructible_v<T, const Expected<U, G>&&> && + !std::is_convertible_v<Expected<U, G>&, T> && !std::is_convertible_v<Expected<U, G>&&, T> && + !std::is_convertible_v<const Expected<U, G>&, T> && + !std::is_convertible_v<const Expected<U, G>&&, T>>; + +} // namespace detail + +template <typename T, typename E> +class Expected : private detail::expected_move_assign_base<T, E>, + private detail::expected_delete_ctor_base<T, E>, + private detail::expected_delete_assign_base<T, E>, + private detail::expected_default_ctor_base<T, E> { +public: + using value_type = T; + using error_type = E; + using unexpected_type = Unexpected<E>; + + constexpr Expected() = default; + constexpr Expected(const Expected&) = default; + constexpr Expected(Expected&&) = default; + Expected& operator=(const Expected&) = default; + Expected& operator=(Expected&&) = default; + + template <typename... Args, std::enable_if_t<std::is_constructible_v<T, Args&&...>>* = nullptr> + constexpr Expected(std::in_place_t, Args&&... args) + : impl_base{std::in_place, std::forward<Args>(args)...}, + ctor_base{detail::default_constructor_tag{}} {} + + template <typename U, typename... Args, + std::enable_if_t<std::is_constructible_v<T, std::initializer_list<U>&, Args&&...>>* = + nullptr> + constexpr Expected(std::in_place_t, std::initializer_list<U> il, Args&&... args) + : impl_base{std::in_place, il, std::forward<Args>(args)...}, + ctor_base{detail::default_constructor_tag{}} {} + + template <typename G = E, std::enable_if_t<std::is_constructible_v<E, const G&>>* = nullptr, + std::enable_if_t<!std::is_convertible_v<const G&, E>>* = nullptr> + constexpr explicit Expected(const Unexpected<G>& e) + : impl_base{unexpect_t{}, e.value()}, ctor_base{detail::default_constructor_tag{}} {} + + template <typename G = E, std::enable_if_t<std::is_constructible_v<E, const G&>>* = nullptr, + std::enable_if_t<std::is_convertible_v<const G&, E>>* = nullptr> + constexpr Expected(Unexpected<G> const& e) + : impl_base{unexpect_t{}, e.value()}, ctor_base{detail::default_constructor_tag{}} {} + + template <typename G = E, std::enable_if_t<std::is_constructible_v<E, G&&>>* = nullptr, + std::enable_if_t<!std::is_convertible_v<G&&, E>>* = nullptr> + constexpr explicit Expected(Unexpected<G>&& e) noexcept(std::is_nothrow_constructible_v<E, G&&>) + : impl_base{unexpect_t{}, std::move(e.value())}, ctor_base{ + detail::default_constructor_tag{}} {} + + template <typename G = E, std::enable_if_t<std::is_constructible_v<E, G&&>>* = nullptr, + std::enable_if_t<std::is_convertible_v<G&&, E>>* = nullptr> + constexpr Expected(Unexpected<G>&& e) noexcept(std::is_nothrow_constructible_v<E, G&&>) + : impl_base{unexpect_t{}, std::move(e.value())}, ctor_base{ + detail::default_constructor_tag{}} {} + + template <typename... Args, std::enable_if_t<std::is_constructible_v<E, Args&&...>>* = nullptr> + constexpr explicit Expected(unexpect_t, Args&&... args) + : impl_base{unexpect_t{}, std::forward<Args>(args)...}, + ctor_base{detail::default_constructor_tag{}} {} + + template <typename U, typename... Args, + std::enable_if_t<std::is_constructible_v<E, std::initializer_list<U>&, Args&&...>>* = + nullptr> + constexpr explicit Expected(unexpect_t, std::initializer_list<U> il, Args&&... args) + : impl_base{unexpect_t{}, il, std::forward<Args>(args)...}, + ctor_base{detail::default_constructor_tag{}} {} + + template <typename U, typename G, + std::enable_if_t<!(std::is_convertible_v<U const&, T> && + std::is_convertible_v<G const&, E>)>* = nullptr, + detail::expected_enable_from_other<T, E, U, G, const U&, const G&>* = nullptr> + constexpr explicit Expected(const Expected<U, G>& rhs) + : ctor_base{detail::default_constructor_tag{}} { + if (rhs.has_value()) { + this->construct(*rhs); + } else { + this->construct_error(rhs.error()); + } + } + + template <typename U, typename G, + std::enable_if_t<(std::is_convertible_v<U const&, T> && + std::is_convertible_v<G const&, E>)>* = nullptr, + detail::expected_enable_from_other<T, E, U, G, const U&, const G&>* = nullptr> + constexpr Expected(const Expected<U, G>& rhs) : ctor_base{detail::default_constructor_tag{}} { + if (rhs.has_value()) { + this->construct(*rhs); + } else { + this->construct_error(rhs.error()); + } + } + + template <typename U, typename G, + std::enable_if_t<!(std::is_convertible_v<U&&, T> && std::is_convertible_v<G&&, E>)>* = + nullptr, + detail::expected_enable_from_other<T, E, U, G, U&&, G&&>* = nullptr> + constexpr explicit Expected(Expected<U, G>&& rhs) + : ctor_base{detail::default_constructor_tag{}} { + if (rhs.has_value()) { + this->construct(std::move(*rhs)); + } else { + this->construct_error(std::move(rhs.error())); + } + } + + template <typename U, typename G, + std::enable_if_t<(std::is_convertible_v<U&&, T> && std::is_convertible_v<G&&, E>)>* = + nullptr, + detail::expected_enable_from_other<T, E, U, G, U&&, G&&>* = nullptr> + constexpr Expected(Expected<U, G>&& rhs) : ctor_base{detail::default_constructor_tag{}} { + if (rhs.has_value()) { + this->construct(std::move(*rhs)); + } else { + this->construct_error(std::move(rhs.error())); + } + } + + template <typename U = T, std::enable_if_t<!std::is_convertible_v<U&&, T>>* = nullptr, + detail::expected_enable_forward_value<T, E, U>* = nullptr> + constexpr explicit Expected(U&& v) : Expected{std::in_place, std::forward<U>(v)} {} + + template <typename U = T, std::enable_if_t<std::is_convertible_v<U&&, T>>* = nullptr, + detail::expected_enable_forward_value<T, E, U>* = nullptr> + constexpr Expected(U&& v) : Expected{std::in_place, std::forward<U>(v)} {} + + template <typename U = T, typename G = T, + std::enable_if_t<std::is_nothrow_constructible_v<T, U&&>>* = nullptr, + std::enable_if_t<( + !std::is_same_v<Expected<T, E>, std::remove_cvref_t<U>> && + !std::conjunction_v<std::is_scalar<T>, std::is_same<T, std::remove_cvref_t<U>>> && + std::is_constructible_v<T, U> && std::is_assignable_v<G&, U> && + std::is_nothrow_move_constructible_v<E>)>* = nullptr> + Expected& operator=(U&& v) { + if (has_value()) { + val() = std::forward<U>(v); + } else { + err().~Unexpected<E>(); + new (valptr()) T{std::forward<U>(v)}; + this->m_has_val = true; + } + + return *this; + } + + template <typename U = T, typename G = T, + std::enable_if_t<!std::is_nothrow_constructible_v<T, U&&>>* = nullptr, + std::enable_if_t<( + !std::is_same_v<Expected<T, E>, std::remove_cvref_t<U>> && + !std::conjunction_v<std::is_scalar<T>, std::is_same<T, std::remove_cvref_t<U>>> && + std::is_constructible_v<T, U> && std::is_assignable_v<G&, U> && + std::is_nothrow_move_constructible_v<E>)>* = nullptr> + Expected& operator=(U&& v) { + if (has_value()) { + val() = std::forward<U>(v); + } else { + auto tmp = std::move(err()); + err().~Unexpected<E>(); + new (valptr()) T{std::forward<U>(v)}; + this->m_has_val = true; + } + + return *this; + } + + template <typename G = E, std::enable_if_t<std::is_nothrow_copy_constructible_v<G> && + std::is_assignable_v<G&, G>>* = nullptr> + Expected& operator=(const Unexpected<G>& rhs) { + if (!has_value()) { + err() = rhs; + } else { + this->destroy_val(); + new (errptr()) Unexpected<E>{rhs}; + this->m_has_val = false; + } + + return *this; + } + + template <typename G = E, std::enable_if_t<std::is_nothrow_move_constructible_v<G> && + std::is_move_assignable_v<G>>* = nullptr> + Expected& operator=(Unexpected<G>&& rhs) noexcept { + if (!has_value()) { + err() = std::move(rhs); + } else { + this->destroy_val(); + new (errptr()) Unexpected<E>{std::move(rhs)}; + this->m_has_val = false; + } + + return *this; + } + + template <typename... Args, + std::enable_if_t<std::is_nothrow_constructible_v<T, Args&&...>>* = nullptr> + void emplace(Args&&... args) { + if (has_value()) { + val() = T{std::forward<Args>(args)...}; + } else { + err().~Unexpected<E>(); + new (valptr()) T{std::forward<Args>(args)...}; + this->m_has_val = true; + } + } + + template <typename... Args, + std::enable_if_t<!std::is_nothrow_constructible_v<T, Args&&...>>* = nullptr> + void emplace(Args&&... args) { + if (has_value()) { + val() = T{std::forward<Args>(args)...}; + } else { + auto tmp = std::move(err()); + err().~Unexpected<E>(); + new (valptr()) T{std::forward<Args>(args)...}; + this->m_has_val = true; + } + } + + template <typename U, typename... Args, + std::enable_if_t<std::is_nothrow_constructible_v<T, std::initializer_list<U>&, + Args&&...>>* = nullptr> + void emplace(std::initializer_list<U> il, Args&&... args) { + if (has_value()) { + T t{il, std::forward<Args>(args)...}; + val() = std::move(t); + } else { + err().~Unexpected<E>(); + new (valptr()) T{il, std::forward<Args>(args)...}; + this->m_has_val = true; + } + } + + template <typename U, typename... Args, + std::enable_if_t<!std::is_nothrow_constructible_v<T, std::initializer_list<U>&, + Args&&...>>* = nullptr> + void emplace(std::initializer_list<U> il, Args&&... args) { + if (has_value()) { + T t{il, std::forward<Args>(args)...}; + val() = std::move(t); + } else { + auto tmp = std::move(err()); + err().~Unexpected<E>(); + new (valptr()) T{il, std::forward<Args>(args)...}; + this->m_has_val = true; + } + } + + constexpr T* operator->() { + return valptr(); + } + + constexpr const T* operator->() const { + return valptr(); + } + + template <typename U = T> + constexpr U& operator*() & { + return val(); + } + + template <typename U = T> + constexpr const U& operator*() const& { + return val(); + } + + template <typename U = T> + constexpr U&& operator*() && { + return std::move(val()); + } + + template <typename U = T> + constexpr const U&& operator*() const&& { + return std::move(val()); + } + + constexpr bool has_value() const noexcept { + return this->m_has_val; + } + + constexpr explicit operator bool() const noexcept { + return this->m_has_val; + } + + template <typename U = T> + constexpr U& value() & { + return val(); + } + + template <typename U = T> + constexpr const U& value() const& { + return val(); + } + + template <typename U = T> + constexpr U&& value() && { + return std::move(val()); + } + + template <typename U = T> + constexpr const U&& value() const&& { + return std::move(val()); + } + + constexpr E& error() & { + return err().value(); + } + + constexpr const E& error() const& { + return err().value(); + } + + constexpr E&& error() && { + return std::move(err().value()); + } + + constexpr const E&& error() const&& { + return std::move(err().value()); + } + + template <typename U> + constexpr T value_or(U&& v) const& { + static_assert(std::is_copy_constructible_v<T> && std::is_convertible_v<U&&, T>, + "T must be copy-constructible and convertible from U&&"); + return bool(*this) ? **this : static_cast<T>(std::forward<U>(v)); + } + + template <typename U> + constexpr T value_or(U&& v) && { + static_assert(std::is_move_constructible_v<T> && std::is_convertible_v<U&&, T>, + "T must be move-constructible and convertible from U&&"); + return bool(*this) ? std::move(**this) : static_cast<T>(std::forward<U>(v)); + } + +private: + static_assert(!std::is_reference_v<T>, "T must not be a reference"); + static_assert(!std::is_same_v<T, std::remove_cv_t<std::in_place_t>>, + "T must not be std::in_place_t"); + static_assert(!std::is_same_v<T, std::remove_cv_t<unexpect_t>>, "T must not be unexpect_t"); + static_assert(!std::is_same_v<T, std::remove_cv_t<Unexpected<E>>>, + "T must not be Unexpected<E>"); + static_assert(!std::is_reference_v<E>, "E must not be a reference"); + + T* valptr() { + return std::addressof(this->m_val); + } + + const T* valptr() const { + return std::addressof(this->m_val); + } + + Unexpected<E>* errptr() { + return std::addressof(this->m_unexpect); + } + + const Unexpected<E>* errptr() const { + return std::addressof(this->m_unexpect); + } + + template <typename U = T> + constexpr U& val() { + return this->m_val; + } + + template <typename U = T> + constexpr const U& val() const { + return this->m_val; + } + + constexpr Unexpected<E>& err() { + return this->m_unexpect; + } + + constexpr const Unexpected<E>& err() const { + return this->m_unexpect; + } + + using impl_base = detail::expected_move_assign_base<T, E>; + using ctor_base = detail::expected_default_ctor_base<T, E>; +}; + +template <typename T, typename E, typename U, typename F> +constexpr bool operator==(const Expected<T, E>& lhs, const Expected<U, F>& rhs) { + return (lhs.has_value() != rhs.has_value()) + ? false + : (!lhs.has_value() ? lhs.error() == rhs.error() : *lhs == *rhs); +} + +template <typename T, typename E, typename U, typename F> +constexpr bool operator!=(const Expected<T, E>& lhs, const Expected<U, F>& rhs) { + return !operator==(lhs, rhs); +} + +template <typename T, typename E, typename U> +constexpr bool operator==(const Expected<T, E>& x, const U& v) { + return x.has_value() ? *x == v : false; +} + +template <typename T, typename E, typename U> +constexpr bool operator==(const U& v, const Expected<T, E>& x) { + return x.has_value() ? *x == v : false; +} + +template <typename T, typename E, typename U> +constexpr bool operator!=(const Expected<T, E>& x, const U& v) { + return !operator==(x, v); +} + +template <typename T, typename E, typename U> +constexpr bool operator!=(const U& v, const Expected<T, E>& x) { + return !operator==(v, x); +} + +template <typename T, typename E> +constexpr bool operator==(const Expected<T, E>& x, const Unexpected<E>& e) { + return x.has_value() ? false : x.error() == e.value(); +} + +template <typename T, typename E> +constexpr bool operator==(const Unexpected<E>& e, const Expected<T, E>& x) { + return x.has_value() ? false : x.error() == e.value(); +} + +template <typename T, typename E> +constexpr bool operator!=(const Expected<T, E>& x, const Unexpected<E>& e) { + return !operator==(x, e); +} + +template <typename T, typename E> +constexpr bool operator!=(const Unexpected<E>& e, const Expected<T, E>& x) { + return !operator==(e, x); +} + +} // namespace Common diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 0e85a9c1d..c51c05b28 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -6,6 +6,7 @@ #include <chrono> #include <climits> #include <exception> +#include <stop_token> #include <thread> #include <vector> @@ -186,6 +187,10 @@ public: initialization_in_progress_suppress_logging = false; } + static void Start() { + instance->StartBackendThread(); + } + Impl(const Impl&) = delete; Impl& operator=(const Impl&) = delete; @@ -201,7 +206,7 @@ public: } void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num, - const char* function, std::string message) { + const char* function, std::string&& message) { if (!filter.CheckMessage(log_class, log_level)) return; const Entry& entry = @@ -211,40 +216,41 @@ public: private: Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_) - : filter{filter_}, file_backend{file_backend_filename}, backend_thread{std::thread([this] { - Common::SetCurrentThreadName("yuzu:Log"); - Entry entry; - const auto write_logs = [this, &entry]() { - ForEachBackend([&entry](Backend& backend) { backend.Write(entry); }); - }; - while (true) { - entry = message_queue.PopWait(); - if (entry.final_entry) { - break; - } - write_logs(); - } - // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a - // case where a system is repeatedly spamming logs even on close. - int max_logs_to_write = filter.IsDebug() ? INT_MAX : 100; - while (max_logs_to_write-- && message_queue.Pop(entry)) { - write_logs(); - } - })} {} + : filter{filter_}, file_backend{file_backend_filename} {} ~Impl() { StopBackendThread(); } + void StartBackendThread() { + backend_thread = std::thread([this] { + Common::SetCurrentThreadName("yuzu:Log"); + Entry entry; + const auto write_logs = [this, &entry]() { + ForEachBackend([&entry](Backend& backend) { backend.Write(entry); }); + }; + while (!stop.stop_requested()) { + entry = message_queue.PopWait(stop.get_token()); + if (entry.filename != nullptr) { + write_logs(); + } + } + // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a + // case where a system is repeatedly spamming logs even on close. + int max_logs_to_write = filter.IsDebug() ? INT_MAX : 100; + while (max_logs_to_write-- && message_queue.Pop(entry)) { + write_logs(); + } + }); + } + void StopBackendThread() { - Entry stop_entry{}; - stop_entry.final_entry = true; - message_queue.Push(stop_entry); + stop.request_stop(); backend_thread.join(); } Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr, - const char* function, std::string message) const { + const char* function, std::string&& message) const { using std::chrono::duration_cast; using std::chrono::microseconds; using std::chrono::steady_clock; @@ -257,7 +263,6 @@ private: .line_num = line_nr, .function = function, .message = std::move(message), - .final_entry = false, }; } @@ -278,8 +283,9 @@ private: ColorConsoleBackend color_console_backend{}; FileBackend file_backend; + std::stop_source stop; std::thread backend_thread; - MPSCQueue<Entry> message_queue{}; + MPSCQueue<Entry, true> message_queue{}; std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; }; } // namespace @@ -288,6 +294,10 @@ void Initialize() { Impl::Initialize(); } +void Start() { + Impl::Start(); +} + void DisableLoggingInTests() { initialization_in_progress_suppress_logging = true; } diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index cb7839ee9..bf785f402 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h @@ -14,6 +14,8 @@ class Filter; /// Initializes the logging system. This should be the first thing called in main. void Initialize(); +void Start(); + void DisableLoggingInTests(); /** diff --git a/src/common/logging/log_entry.h b/src/common/logging/log_entry.h index dd6f44841..b28570071 100644 --- a/src/common/logging/log_entry.h +++ b/src/common/logging/log_entry.h @@ -22,7 +22,6 @@ struct Entry { unsigned int line_num = 0; std::string function; std::string message; - bool final_entry = false; }; } // namespace Common::Log diff --git a/src/common/math_util.h b/src/common/math_util.h index 4c38d8040..510c4e56d 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -48,8 +48,8 @@ struct Rectangle { } [[nodiscard]] Rectangle<T> Scale(const float s) const { - return Rectangle{left, top, static_cast<T>(left + GetWidth() * s), - static_cast<T>(top + GetHeight() * s)}; + return Rectangle{left, top, static_cast<T>(static_cast<float>(left + GetWidth()) * s), + static_cast<T>(static_cast<float>(top + GetHeight()) * s)}; } }; diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 9dd5e3efb..3bcaa072f 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -47,7 +47,9 @@ void LogSettings() { log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); - log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue()); + log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); + log_setting("Renderer_ScalingFilter", values.scaling_filter.GetValue()); + log_setting("Renderer_AntiAliasing", values.anti_aliasing.GetValue()); log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue()); log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue()); log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue()); @@ -105,6 +107,55 @@ float Volume() { return values.volume.GetValue() / 100.0f; } +void UpdateRescalingInfo() { + const auto setup = values.resolution_setup.GetValue(); + auto& info = values.resolution_info; + info.downscale = false; + switch (setup) { + case ResolutionSetup::Res1_2X: + info.up_scale = 1; + info.down_shift = 1; + info.downscale = true; + break; + case ResolutionSetup::Res3_4X: + info.up_scale = 3; + info.down_shift = 2; + info.downscale = true; + break; + case ResolutionSetup::Res1X: + info.up_scale = 1; + info.down_shift = 0; + break; + case ResolutionSetup::Res2X: + info.up_scale = 2; + info.down_shift = 0; + break; + case ResolutionSetup::Res3X: + info.up_scale = 3; + info.down_shift = 0; + break; + case ResolutionSetup::Res4X: + info.up_scale = 4; + info.down_shift = 0; + break; + case ResolutionSetup::Res5X: + info.up_scale = 5; + info.down_shift = 0; + break; + case ResolutionSetup::Res6X: + info.up_scale = 6; + info.down_shift = 0; + break; + default: + UNREACHABLE(); + info.up_scale = 1; + info.down_shift = 0; + } + info.up_factor = static_cast<f32>(info.up_scale) / (1U << info.down_shift); + info.down_factor = static_cast<f32>(1U << info.down_shift) / info.up_scale; + info.active = info.up_scale != 1 || info.down_shift != 0; +} + void RestoreGlobalState(bool is_powered_on) { // If a game is running, DO NOT restore the global settings state if (is_powered_on) { diff --git a/src/common/settings.h b/src/common/settings.h index 9ff4cf85d..42f8b4a7d 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -52,6 +52,56 @@ enum class NvdecEmulation : u32 { GPU = 2, }; +enum class ResolutionSetup : u32 { + Res1_2X = 0, + Res3_4X = 1, + Res1X = 2, + Res2X = 3, + Res3X = 4, + Res4X = 5, + Res5X = 6, + Res6X = 7, +}; + +enum class ScalingFilter : u32 { + NearestNeighbor = 0, + Bilinear = 1, + Bicubic = 2, + Gaussian = 3, + ScaleForce = 4, + Fsr = 5, + LastFilter = Fsr, +}; + +enum class AntiAliasing : u32 { + None = 0, + Fxaa = 1, + LastAA = Fxaa, +}; + +struct ResolutionScalingInfo { + u32 up_scale{1}; + u32 down_shift{0}; + f32 up_factor{1.0f}; + f32 down_factor{1.0f}; + bool active{}; + bool downscale{}; + + s32 ScaleUp(s32 value) const { + if (value == 0) { + return 0; + } + return std::max((value * static_cast<s32>(up_scale)) >> static_cast<s32>(down_shift), 1); + } + + u32 ScaleUp(u32 value) const { + if (value == 0U) { + return 0U; + } + return std::max((value * up_scale) >> down_shift, 1U); + } +}; + /** The BasicSetting class is a simple resource manager. It defines a label and default value * alongside the actual value of the setting for simpler and less-error prone use with frontend * configurations. Setting a default value and label is required, though subclasses may deviate from @@ -451,7 +501,10 @@ struct Values { "disable_shader_loop_safety_checks"}; Setting<int> vulkan_device{0, "vulkan_device"}; - Setting<u16> resolution_factor{1, "resolution_factor"}; + ResolutionScalingInfo resolution_info{}; + Setting<ResolutionSetup> resolution_setup{ResolutionSetup::Res1X, "resolution_setup"}; + Setting<ScalingFilter> scaling_filter{ScalingFilter::Bilinear, "scaling_filter"}; + Setting<AntiAliasing> anti_aliasing{AntiAliasing::None, "anti_aliasing"}; // *nix platforms may have issues with the borderless windowed fullscreen mode. // Default to exclusive fullscreen on these platforms for now. RangedSetting<FullscreenMode> fullscreen_mode{ @@ -462,7 +515,7 @@ struct Values { #endif FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"}; RangedSetting<int> aspect_ratio{0, 0, 3, "aspect_ratio"}; - RangedSetting<int> max_anisotropy{0, 0, 4, "max_anisotropy"}; + RangedSetting<int> max_anisotropy{0, 0, 5, "max_anisotropy"}; Setting<bool> use_speed_limit{true, "use_speed_limit"}; RangedSetting<u16> speed_limit{100, 0, 9999, "speed_limit"}; Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"}; @@ -595,6 +648,8 @@ std::string GetTimeZoneString(); void LogSettings(); +void UpdateRescalingInfo(); + // Restore the global state of all applicable settings in the Values struct void RestoreGlobalState(bool is_powered_on); diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index 063605b46..5d47b600d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -9,7 +9,6 @@ #include <dynarmic/interface/A32/a32.h> #include <dynarmic/interface/A64/a64.h> -#include <dynarmic/interface/exclusive_monitor.h> #include "common/common_types.h" #include "common/hash.h" #include "core/arm/arm_interface.h" diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 4fd15f111..4e73cc03a 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -18,7 +18,6 @@ #include "core/core_timing.h" #include "core/hardware_properties.h" #include "core/hle/kernel/k_process.h" -#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/svc.h" #include "core/memory.h" diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp index ebd506121..a043e6735 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp @@ -8,7 +8,6 @@ #include "core/arm/dynarmic/arm_dynarmic_cp15.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" using Callback = Dynarmic::A32::Coprocessor::Callback; using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h index 7c7ede79e..f271b2070 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h @@ -4,7 +4,6 @@ #pragma once -#include <memory> #include <optional> #include <dynarmic/interface/A32/coprocessor.h> diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp index 9426a3edf..397d054a8 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cinttypes> -#include <memory> #include "core/arm/dynarmic/arm_exclusive_monitor.h" #include "core/memory.h" diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h index 73d41f223..265c4ecef 100644 --- a/src/core/arm/dynarmic/arm_exclusive_monitor.h +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h @@ -4,7 +4,6 @@ #pragma once -#include <memory> #include <unordered_map> #include <dynarmic/interface/exclusive_monitor.h> diff --git a/src/core/core.cpp b/src/core/core.cpp index c3a0f9dae..07448fd29 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -19,20 +19,16 @@ #include "core/cpu_manager.h" #include "core/device_memory.h" #include "core/file_sys/bis_factory.h" -#include "core/file_sys/card_image.h" #include "core/file_sys/mode.h" #include "core/file_sys/patch_manager.h" #include "core/file_sys/registered_cache.h" #include "core/file_sys/romfs_factory.h" #include "core/file_sys/savedata_factory.h" -#include "core/file_sys/sdmc_factory.h" #include "core/file_sys/vfs_concat.h" #include "core/file_sys/vfs_real.h" #include "core/hardware_interrupt_manager.h" -#include "core/hle/kernel/k_client_port.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_scheduler.h" -#include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/physical_core.h" #include "core/hle/service/am/applets/applets.h" @@ -328,8 +324,8 @@ struct System::Impl { time_manager.Shutdown(); core_timing.Shutdown(); app_loader.reset(); - perf_stats.reset(); gpu_core.reset(); + perf_stats.reset(); kernel.Shutdown(); memory.Reset(); applet_manager.ClearAll(); @@ -353,7 +349,7 @@ struct System::Impl { } Service::Glue::ApplicationLaunchProperty launch{}; - launch.title_id = process.GetTitleID(); + launch.title_id = process.GetProgramID(); FileSys::PatchManager pm{launch.title_id, fs_controller, *content_provider}; launch.version = pm.GetGameVersion().value_or(0); @@ -643,6 +639,10 @@ const Core::SpeedLimiter& System::SpeedLimiter() const { return impl->speed_limiter; } +u64 System::GetCurrentProcessProgramID() const { + return impl->kernel.CurrentProcess()->GetProgramID(); +} + Loader::ResultStatus System::GetGameName(std::string& out) const { return impl->GetGameName(out); } diff --git a/src/core/core.h b/src/core/core.h index 1cfe1bba6..01bc0a2c7 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -297,6 +297,8 @@ public: /// Provides a constant reference to the speed limiter [[nodiscard]] const Core::SpeedLimiter& SpeedLimiter() const; + [[nodiscard]] u64 GetCurrentProcessProgramID() const; + /// Gets the name of the current game [[nodiscard]] Loader::ResultStatus GetGameName(std::string& out) const; diff --git a/src/core/core_timing.h b/src/core/core_timing.h index b64caacda..888828fd0 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -8,7 +8,6 @@ #include <chrono> #include <functional> #include <memory> -#include <mutex> #include <optional> #include <string> #include <thread> diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 77efcabf0..5d43c6e5d 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -6,7 +6,6 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/thread.h" -#include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/core_timing.h" #include "core/cpu_manager.h" diff --git a/src/core/crypto/ctr_encryption_layer.cpp b/src/core/crypto/ctr_encryption_layer.cpp index 1231da8e3..3a2af4f50 100644 --- a/src/core/crypto/ctr_encryption_layer.cpp +++ b/src/core/crypto/ctr_encryption_layer.cpp @@ -4,7 +4,6 @@ #include <algorithm> #include <cstring> -#include "common/assert.h" #include "core/crypto/ctr_encryption_layer.h" namespace Core::Crypto { diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp index a98daed89..9244907b5 100644 --- a/src/core/crypto/key_manager.cpp +++ b/src/core/crypto/key_manager.cpp @@ -10,14 +10,12 @@ #include <locale> #include <map> #include <sstream> -#include <string_view> #include <tuple> #include <vector> #include <mbedtls/bignum.h> #include <mbedtls/cipher.h> #include <mbedtls/cmac.h> #include <mbedtls/sha256.h> -#include "common/common_funcs.h" #include "common/fs/file.h" #include "common/fs/fs.h" #include "common/fs/path_util.h" @@ -30,7 +28,6 @@ #include "core/crypto/partition_data_manager.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/nca_metadata.h" -#include "core/file_sys/partition_filesystem.h" #include "core/file_sys/registered_cache.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/loader/loader.h" diff --git a/src/core/crypto/key_manager.h b/src/core/crypto/key_manager.h index e771625e1..ac1eb8962 100644 --- a/src/core/crypto/key_manager.h +++ b/src/core/crypto/key_manager.h @@ -15,7 +15,6 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "core/crypto/partition_data_manager.h" -#include "core/file_sys/vfs_types.h" namespace Common::FS { class IOFile; diff --git a/src/core/crypto/partition_data_manager.cpp b/src/core/crypto/partition_data_manager.cpp index 5f1c86a09..d18252a54 100644 --- a/src/core/crypto/partition_data_manager.cpp +++ b/src/core/crypto/partition_data_manager.cpp @@ -12,7 +12,6 @@ #include <cctype> #include <cstring> #include <mbedtls/sha256.h> -#include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/hex_util.h" diff --git a/src/core/crypto/xts_encryption_layer.cpp b/src/core/crypto/xts_encryption_layer.cpp index 8f0ba4ee7..c2b7ea309 100644 --- a/src/core/crypto/xts_encryption_layer.cpp +++ b/src/core/crypto/xts_encryption_layer.cpp @@ -4,7 +4,6 @@ #include <algorithm> #include <cstring> -#include "common/assert.h" #include "core/crypto/xts_encryption_layer.h" namespace Core::Crypto { diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp index 755d3303e..c6300be59 100644 --- a/src/core/file_sys/card_image.cpp +++ b/src/core/file_sys/card_image.cpp @@ -14,7 +14,6 @@ #include "core/file_sys/nca_metadata.h" #include "core/file_sys/partition_filesystem.h" #include "core/file_sys/submission_package.h" -#include "core/file_sys/vfs_concat.h" #include "core/file_sys/vfs_offset.h" #include "core/file_sys/vfs_vector.h" #include "core/loader/loader.h" diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h index af2b723df..3e0b45630 100644 --- a/src/core/file_sys/control_metadata.h +++ b/src/core/file_sys/control_metadata.h @@ -5,7 +5,6 @@ #pragma once #include <array> -#include <memory> #include <string> #include "common/common_funcs.h" #include "common/common_types.h" diff --git a/src/core/file_sys/directory.h b/src/core/file_sys/directory.h index 0d73eecc9..21c7aefc8 100644 --- a/src/core/file_sys/directory.h +++ b/src/core/file_sys/directory.h @@ -6,7 +6,6 @@ #include <cstddef> #include <iterator> -#include <string_view> #include "common/common_funcs.h" #include "common/common_types.h" diff --git a/src/core/file_sys/nca_metadata.h b/src/core/file_sys/nca_metadata.h index ce1138a17..75c74ae28 100644 --- a/src/core/file_sys/nca_metadata.h +++ b/src/core/file_sys/nca_metadata.h @@ -5,7 +5,6 @@ #pragma once #include <array> -#include <memory> #include <vector> #include "common/common_funcs.h" #include "common/common_types.h" diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index 35a53d36c..4e46c24cf 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp @@ -53,13 +53,16 @@ Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) { } /*static*/ ProgramMetadata ProgramMetadata::GetDefault() { + // Allow use of cores 0~3 and thread priorities 1~63. + constexpr u32 default_thread_info_capability = 0x30007F7; + ProgramMetadata result; result.LoadManual( true /*is_64_bit*/, FileSys::ProgramAddressSpaceType::Is39Bit /*address_space*/, 0x2c /*main_thread_prio*/, 0 /*main_thread_core*/, 0x00100000 /*main_thread_stack_size*/, 0 /*title_id*/, 0xFFFFFFFFFFFFFFFF /*filesystem_permissions*/, - 0x1FE00000 /*system_resource_size*/, {} /*capabilities*/); + 0x1FE00000 /*system_resource_size*/, {default_thread_info_capability} /*capabilities*/); return result; } diff --git a/src/core/file_sys/romfs_factory.cpp b/src/core/file_sys/romfs_factory.cpp index 638c6cea8..291b746b6 100644 --- a/src/core/file_sys/romfs_factory.cpp +++ b/src/core/file_sys/romfs_factory.cpp @@ -6,7 +6,6 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" -#include "core/file_sys/card_image.h" #include "core/file_sys/common_funcs.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/nca_metadata.h" @@ -39,13 +38,12 @@ void RomFSFactory::SetPackedUpdate(VirtualFile update_raw_file) { ResultVal<VirtualFile> RomFSFactory::OpenCurrentProcess(u64 current_process_title_id) const { if (!updatable) { - return MakeResult<VirtualFile>(file); + return file; } const PatchManager patch_manager{current_process_title_id, filesystem_controller, content_provider}; - return MakeResult<VirtualFile>( - patch_manager.PatchRomFS(file, ivfc_offset, ContentRecordType::Program, update_raw)); + return patch_manager.PatchRomFS(file, ivfc_offset, ContentRecordType::Program, update_raw); } ResultVal<VirtualFile> RomFSFactory::OpenPatchedRomFS(u64 title_id, ContentRecordType type) const { @@ -58,8 +56,7 @@ ResultVal<VirtualFile> RomFSFactory::OpenPatchedRomFS(u64 title_id, ContentRecor const PatchManager patch_manager{title_id, filesystem_controller, content_provider}; - return MakeResult<VirtualFile>( - patch_manager.PatchRomFS(nca->GetRomFS(), nca->GetBaseIVFCOffset(), type)); + return patch_manager.PatchRomFS(nca->GetRomFS(), nca->GetBaseIVFCOffset(), type); } ResultVal<VirtualFile> RomFSFactory::OpenPatchedRomFSWithProgramIndex( @@ -83,7 +80,7 @@ ResultVal<VirtualFile> RomFSFactory::Open(u64 title_id, StorageId storage, return ResultUnknown; } - return MakeResult<VirtualFile>(romfs); + return romfs; } std::shared_ptr<NCA> RomFSFactory::GetEntry(u64 title_id, StorageId storage, diff --git a/src/core/file_sys/romfs_factory.h b/src/core/file_sys/romfs_factory.h index 39db09e4e..2c93a49a5 100644 --- a/src/core/file_sys/romfs_factory.h +++ b/src/core/file_sys/romfs_factory.h @@ -5,8 +5,9 @@ #pragma once #include <memory> + #include "common/common_types.h" -#include "core/file_sys/vfs.h" +#include "core/file_sys/vfs_types.h" #include "core/hle/result.h" namespace Loader { diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp index b5254dd75..e6f8514c9 100644 --- a/src/core/file_sys/savedata_factory.cpp +++ b/src/core/file_sys/savedata_factory.cpp @@ -9,7 +9,6 @@ #include "core/core.h" #include "core/file_sys/savedata_factory.h" #include "core/file_sys/vfs.h" -#include "core/hle/kernel/k_process.h" namespace FileSys { @@ -94,7 +93,7 @@ ResultVal<VirtualDir> SaveDataFactory::Create(SaveDataSpaceId space, return ResultUnknown; } - return MakeResult<VirtualDir>(std::move(out)); + return out; } ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, @@ -115,7 +114,7 @@ ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, return ResultUnknown; } - return MakeResult<VirtualDir>(std::move(out)); + return out; } VirtualDir SaveDataFactory::GetSaveDataSpaceDirectory(SaveDataSpaceId space) const { @@ -143,7 +142,7 @@ std::string SaveDataFactory::GetFullPath(Core::System& system, SaveDataSpaceId s // be interpreted as the title id of the current process. if (type == SaveDataType::SaveData || type == SaveDataType::DeviceSaveData) { if (title_id == 0) { - title_id = system.CurrentProcess()->GetTitleID(); + title_id = system.GetCurrentProcessProgramID(); } } diff --git a/src/core/file_sys/savedata_factory.h b/src/core/file_sys/savedata_factory.h index 1d8dc981f..de415b0c4 100644 --- a/src/core/file_sys/savedata_factory.h +++ b/src/core/file_sys/savedata_factory.h @@ -8,7 +8,6 @@ #include <string> #include "common/common_funcs.h" #include "common/common_types.h" -#include "common/swap.h" #include "core/file_sys/vfs.h" #include "core/hle/result.h" diff --git a/src/core/file_sys/sdmc_factory.cpp b/src/core/file_sys/sdmc_factory.cpp index e5c72cd4d..c0e13e56f 100644 --- a/src/core/file_sys/sdmc_factory.cpp +++ b/src/core/file_sys/sdmc_factory.cpp @@ -25,7 +25,7 @@ SDMCFactory::SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_) SDMCFactory::~SDMCFactory() = default; ResultVal<VirtualDir> SDMCFactory::Open() const { - return MakeResult<VirtualDir>(sd_dir); + return sd_dir; } VirtualDir SDMCFactory::GetSDMCModificationLoadRoot(u64 title_id) const { diff --git a/src/core/file_sys/submission_package.cpp b/src/core/file_sys/submission_package.cpp index f192dffa5..f03124e3d 100644 --- a/src/core/file_sys/submission_package.cpp +++ b/src/core/file_sys/submission_package.cpp @@ -4,7 +4,6 @@ #include <algorithm> #include <cstring> -#include <string_view> #include <fmt/ostream.h> diff --git a/src/core/file_sys/system_archive/time_zone_binary.h b/src/core/file_sys/system_archive/time_zone_binary.h index ed2b78227..266c23537 100644 --- a/src/core/file_sys/system_archive/time_zone_binary.h +++ b/src/core/file_sys/system_archive/time_zone_binary.h @@ -4,7 +4,6 @@ #pragma once -#include <string> #include "core/file_sys/vfs_types.h" namespace FileSys::SystemArchive { diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h index ff6935da6..3e625fad6 100644 --- a/src/core/file_sys/vfs.h +++ b/src/core/file_sys/vfs.h @@ -9,7 +9,6 @@ #include <memory> #include <optional> #include <string> -#include <string_view> #include <type_traits> #include <vector> diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h index cd32960a5..bd091451e 100644 --- a/src/core/file_sys/vfs_concat.h +++ b/src/core/file_sys/vfs_concat.h @@ -6,7 +6,6 @@ #include <map> #include <memory> -#include <string_view> #include "core/file_sys/vfs.h" namespace FileSys { diff --git a/src/core/file_sys/vfs_offset.h b/src/core/file_sys/vfs_offset.h index 42f78b3d9..7ce1eb336 100644 --- a/src/core/file_sys/vfs_offset.h +++ b/src/core/file_sys/vfs_offset.h @@ -5,7 +5,6 @@ #pragma once #include <memory> -#include <string_view> #include "core/file_sys/vfs.h" diff --git a/src/core/frontend/applets/general_frontend.h b/src/core/frontend/applets/general_frontend.h index b713b14ee..1647aa975 100644 --- a/src/core/frontend/applets/general_frontend.h +++ b/src/core/frontend/applets/general_frontend.h @@ -5,7 +5,6 @@ #pragma once #include <functional> -#include <optional> #include "common/common_types.h" namespace Core::Frontend { diff --git a/src/core/frontend/applets/software_keyboard.cpp b/src/core/frontend/applets/software_keyboard.cpp index 12c76c9ee..c4863ee73 100644 --- a/src/core/frontend/applets/software_keyboard.cpp +++ b/src/core/frontend/applets/software_keyboard.cpp @@ -16,7 +16,8 @@ DefaultSoftwareKeyboardApplet::~DefaultSoftwareKeyboardApplet() = default; void DefaultSoftwareKeyboardApplet::InitializeKeyboard( bool is_inline, KeyboardInitializeParameters initialize_parameters, - std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> submit_normal_callback_, + std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)> + submit_normal_callback_, std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)> submit_inline_callback_) { if (is_inline) { @@ -128,7 +129,7 @@ void DefaultSoftwareKeyboardApplet::ExitKeyboard() const { } void DefaultSoftwareKeyboardApplet::SubmitNormalText(std::u16string text) const { - submit_normal_callback(Service::AM::Applets::SwkbdResult::Ok, text); + submit_normal_callback(Service::AM::Applets::SwkbdResult::Ok, text, true); } void DefaultSoftwareKeyboardApplet::SubmitInlineText(std::u16string_view text) const { diff --git a/src/core/frontend/applets/software_keyboard.h b/src/core/frontend/applets/software_keyboard.h index 228a548d4..490c55cc2 100644 --- a/src/core/frontend/applets/software_keyboard.h +++ b/src/core/frontend/applets/software_keyboard.h @@ -5,7 +5,6 @@ #pragma once #include <functional> -#include <thread> #include "common/common_types.h" @@ -58,7 +57,7 @@ public: virtual void InitializeKeyboard( bool is_inline, KeyboardInitializeParameters initialize_parameters, - std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> + std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)> submit_normal_callback_, std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)> submit_inline_callback_) = 0; @@ -83,7 +82,7 @@ public: void InitializeKeyboard( bool is_inline, KeyboardInitializeParameters initialize_parameters, - std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> + std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)> submit_normal_callback_, std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)> submit_inline_callback_) override; @@ -107,7 +106,7 @@ private: KeyboardInitializeParameters parameters; - mutable std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> + mutable std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)> submit_normal_callback; mutable std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)> submit_inline_callback; diff --git a/src/core/frontend/applets/web_browser.h b/src/core/frontend/applets/web_browser.h index 915dde677..b6a60c994 100644 --- a/src/core/frontend/applets/web_browser.h +++ b/src/core/frontend/applets/web_browser.h @@ -5,7 +5,6 @@ #pragma once #include <functional> -#include <string_view> #include "core/hle/service/am/applets/applet_web_browser_types.h" diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp index e11ec0b0b..e1f7e5886 100644 --- a/src/core/frontend/emu_window.cpp +++ b/src/core/frontend/emu_window.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cmath> #include <mutex> #include "common/settings.h" #include "core/frontend/emu_window.h" diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 076148698..8a86a1d27 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -5,7 +5,6 @@ #pragma once #include <memory> -#include <tuple> #include <utility> #include "common/common_types.h" #include "core/frontend/framebuffer_layout.h" diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index 0832463d6..4b58b672a 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -44,16 +44,13 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) { return res; } -FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) { - u32 width, height; +FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale) { + const bool is_docked = Settings::values.use_docked_mode.GetValue(); + const u32 screen_width = is_docked ? ScreenDocked::Width : ScreenUndocked::Width; + const u32 screen_height = is_docked ? ScreenDocked::Height : ScreenUndocked::Height; - if (Settings::values.use_docked_mode.GetValue()) { - width = ScreenDocked::Width * res_scale; - height = ScreenDocked::Height * res_scale; - } else { - width = ScreenUndocked::Width * res_scale; - height = ScreenUndocked::Height * res_scale; - } + const u32 width = static_cast<u32>(static_cast<f32>(screen_width) * res_scale); + const u32 height = static_cast<u32>(static_cast<f32>(screen_height) * res_scale); return DefaultFrameLayout(width, height); } diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index e2e3bbbb3..2e36c0163 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -60,7 +60,7 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height); * Convenience method to get frame layout by resolution scale * @param res_scale resolution scale factor */ -FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale); +FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale); /** * Convenience method to determine emulation aspect ratio diff --git a/src/core/hle/kernel/board/nintendo/nx/secure_monitor.h b/src/core/hle/kernel/board/nintendo/nx/secure_monitor.h index 0c366b252..f77a91dec 100644 --- a/src/core/hle/kernel/board/nintendo/nx/secure_monitor.h +++ b/src/core/hle/kernel/board/nintendo/nx/secure_monitor.h @@ -4,8 +4,6 @@ #pragma once -#include "common/common_types.h" - namespace Kernel::Board::Nintendo::Nx::Smc { enum MemorySize { diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h index d8ad54030..5cc3b9829 100644 --- a/src/core/hle/kernel/code_set.h +++ b/src/core/hle/kernel/code_set.h @@ -5,7 +5,6 @@ #pragma once #include <cstddef> -#include <vector> #include "common/common_types.h" #include "core/hle/kernel/physical_memory.h" diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index cee96dd9b..e19544c54 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <array> #include <sstream> -#include <utility> #include <boost/range/algorithm_ext/erase.hpp> @@ -19,14 +18,9 @@ #include "core/hle/kernel/k_handle_table.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_readable_event.h" -#include "core/hle/kernel/k_scheduler.h" -#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/k_server_session.h" #include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/k_writable_event.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/svc_results.h" -#include "core/hle/kernel/time_manager.h" #include "core/memory.h" namespace Kernel { diff --git a/src/core/hle/kernel/init/init_slab_setup.cpp b/src/core/hle/kernel/init/init_slab_setup.cpp index 10edede17..8ff0f695d 100644 --- a/src/core/hle/kernel/init/init_slab_setup.cpp +++ b/src/core/hle/kernel/init/init_slab_setup.cpp @@ -20,8 +20,6 @@ #include "core/hle/kernel/k_system_control.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/k_transfer_memory.h" -#include "core/hle/kernel/memory_types.h" -#include "core/memory.h" namespace Kernel::Init { diff --git a/src/core/hle/kernel/k_auto_object_container.h b/src/core/hle/kernel/k_auto_object_container.h index 459953450..4eadfe99d 100644 --- a/src/core/hle/kernel/k_auto_object_container.h +++ b/src/core/hle/kernel/k_auto_object_container.h @@ -4,14 +4,9 @@ #pragma once -#include <atomic> - #include <boost/intrusive/rbtree.hpp> -#include "common/assert.h" #include "common/common_funcs.h" -#include "common/common_types.h" -#include "common/intrusive_red_black_tree.h" #include "core/hle/kernel/k_auto_object.h" #include "core/hle/kernel/k_light_lock.h" diff --git a/src/core/hle/kernel/k_class_token.h b/src/core/hle/kernel/k_class_token.h index c28db49ec..980010150 100644 --- a/src/core/hle/kernel/k_class_token.h +++ b/src/core/hle/kernel/k_class_token.h @@ -6,7 +6,6 @@ #include <atomic> -#include "common/assert.h" #include "common/bit_util.h" #include "common/common_types.h" diff --git a/src/core/hle/kernel/k_client_session.cpp b/src/core/hle/kernel/k_client_session.cpp index 8ad1be762..242582f8f 100644 --- a/src/core/hle/kernel/k_client_session.cpp +++ b/src/core/hle/kernel/k_client_session.cpp @@ -7,7 +7,6 @@ #include "core/hle/kernel/k_server_session.h" #include "core/hle/kernel/k_session.h" #include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/svc_results.h" #include "core/hle/result.h" namespace Kernel { diff --git a/src/core/hle/kernel/k_client_session.h b/src/core/hle/kernel/k_client_session.h index 230e3b6b8..ad6cc4ed1 100644 --- a/src/core/hle/kernel/k_client_session.h +++ b/src/core/hle/kernel/k_client_session.h @@ -4,11 +4,9 @@ #pragma once -#include <memory> #include <string> #include "core/hle/kernel/k_auto_object.h" -#include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/slab_helpers.h" #include "core/hle/result.h" diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp index ef14ad1d2..7fa9b8cc3 100644 --- a/src/core/hle/kernel/k_condition_variable.cpp +++ b/src/core/hle/kernel/k_condition_variable.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <vector> - #include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/hle/kernel/k_condition_variable.h" diff --git a/src/core/hle/kernel/k_handle_table.h b/src/core/hle/kernel/k_handle_table.h index 2ff6aa160..95ec905ae 100644 --- a/src/core/hle/kernel/k_handle_table.h +++ b/src/core/hle/kernel/k_handle_table.h @@ -8,7 +8,6 @@ #include "common/assert.h" #include "common/bit_field.h" -#include "common/bit_util.h" #include "common/common_types.h" #include "core/hle/kernel/k_auto_object.h" #include "core/hle/kernel/k_spin_lock.h" diff --git a/src/core/hle/kernel/k_light_condition_variable.h b/src/core/hle/kernel/k_light_condition_variable.h index a95fa41f3..fb0ad783a 100644 --- a/src/core/hle/kernel/k_light_condition_variable.h +++ b/src/core/hle/kernel/k_light_condition_variable.h @@ -10,7 +10,6 @@ #include "common/common_types.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" -#include "core/hle/kernel/k_thread_queue.h" #include "core/hle/kernel/time_manager.h" namespace Kernel { diff --git a/src/core/hle/kernel/k_light_lock.h b/src/core/hle/kernel/k_light_lock.h index f4c45f76a..ad853661d 100644 --- a/src/core/hle/kernel/k_light_lock.h +++ b/src/core/hle/kernel/k_light_lock.h @@ -6,7 +6,6 @@ #include <atomic> -#include "common/common_types.h" #include "core/hle/kernel/k_scoped_lock.h" namespace Kernel { diff --git a/src/core/hle/kernel/k_memory_manager.h b/src/core/hle/kernel/k_memory_manager.h index ac840b3d0..39badc5f1 100644 --- a/src/core/hle/kernel/k_memory_manager.h +++ b/src/core/hle/kernel/k_memory_manager.h @@ -8,7 +8,6 @@ #include <mutex> #include <tuple> -#include "common/common_funcs.h" #include "common/common_types.h" #include "core/hle/kernel/k_page_heap.h" #include "core/hle/result.h" diff --git a/src/core/hle/kernel/k_page_heap.cpp b/src/core/hle/kernel/k_page_heap.cpp index 07e062922..29d996d62 100644 --- a/src/core/hle/kernel/k_page_heap.cpp +++ b/src/core/hle/kernel/k_page_heap.cpp @@ -4,7 +4,6 @@ #include "core/core.h" #include "core/hle/kernel/k_page_heap.h" -#include "core/memory.h" namespace Kernel { diff --git a/src/core/hle/kernel/k_page_heap.h b/src/core/hle/kernel/k_page_heap.h index de5d6a189..8d9f30523 100644 --- a/src/core/hle/kernel/k_page_heap.h +++ b/src/core/hle/kernel/k_page_heap.h @@ -5,12 +5,9 @@ #pragma once #include <array> -#include <bit> #include <vector> #include "common/alignment.h" -#include "common/assert.h" -#include "common/common_funcs.h" #include "common/common_types.h" #include "core/hle/kernel/k_page_bitmap.h" #include "core/hle/kernel/memory_types.h" diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index 5e0b620c2..526b87241 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -859,7 +859,7 @@ ResultVal<VAddr> KPageTable::SetHeapSize(std::size_t size) { current_heap_addr = heap_region_start + size; } - return MakeResult<VAddr>(heap_region_start); + return heap_region_start; } ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align, @@ -893,7 +893,7 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, block_manager->Update(addr, needed_num_pages, state, perm); - return MakeResult<VAddr>(addr); + return addr; } ResultCode KPageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) { diff --git a/src/core/hle/kernel/k_port.h b/src/core/hle/kernel/k_port.h index 4018ea2df..b6e4a1fcd 100644 --- a/src/core/hle/kernel/k_port.h +++ b/src/core/hle/kernel/k_port.h @@ -4,7 +4,6 @@ #pragma once -#include <memory> #include <string> #include "common/common_types.h" diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index 1a53e2be7..8a8c1fcbb 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -8,7 +8,6 @@ #include <cstddef> #include <list> #include <string> -#include <unordered_map> #include <vector> #include "common/common_types.h" #include "core/hle/kernel/k_address_arbiter.h" @@ -155,8 +154,8 @@ public: return process_id; } - /// Gets the title ID corresponding to this process. - u64 GetTitleID() const { + /// Gets the program ID corresponding to this process. + u64 GetProgramID() const { return program_id; } diff --git a/src/core/hle/kernel/k_scheduler_lock.h b/src/core/hle/kernel/k_scheduler_lock.h index 47e315555..c571f2992 100644 --- a/src/core/hle/kernel/k_scheduler_lock.h +++ b/src/core/hle/kernel/k_scheduler_lock.h @@ -5,7 +5,6 @@ #pragma once #include "common/assert.h" -#include "core/hardware_properties.h" #include "core/hle/kernel/k_spin_lock.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/kernel.h" diff --git a/src/core/hle/kernel/k_scoped_lock.h b/src/core/hle/kernel/k_scoped_lock.h index 4fb180fc6..89a7ffe49 100644 --- a/src/core/hle/kernel/k_scoped_lock.h +++ b/src/core/hle/kernel/k_scoped_lock.h @@ -7,7 +7,8 @@ #pragma once -#include "common/common_types.h" +#include <concepts> +#include <type_traits> namespace Kernel { diff --git a/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h index f6c75f2d9..61dc2858f 100644 --- a/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h +++ b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h @@ -8,7 +8,6 @@ #pragma once #include "common/common_types.h" -#include "core/hle/kernel/k_handle_table.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/time_manager.h" diff --git a/src/core/hle/kernel/k_server_port.cpp b/src/core/hle/kernel/k_server_port.cpp index c5dc58387..433fc98e1 100644 --- a/src/core/hle/kernel/k_server_port.cpp +++ b/src/core/hle/kernel/k_server_port.cpp @@ -10,7 +10,6 @@ #include "core/hle/kernel/k_server_port.h" #include "core/hle/kernel/k_server_session.h" #include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/svc_results.h" namespace Kernel { diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h index 67a36da40..6302d5e61 100644 --- a/src/core/hle/kernel/k_server_port.h +++ b/src/core/hle/kernel/k_server_port.h @@ -7,14 +7,11 @@ #include <memory> #include <string> #include <utility> -#include <vector> #include <boost/intrusive/list.hpp> -#include "common/common_types.h" #include "core/hle/kernel/k_server_session.h" #include "core/hle/kernel/k_synchronization_object.h" -#include "core/hle/result.h" namespace Kernel { diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp index b9f24475c..2bd53ccbd 100644 --- a/src/core/hle/kernel/k_server_session.cpp +++ b/src/core/hle/kernel/k_server_session.cpp @@ -14,7 +14,6 @@ #include "core/hle/kernel/hle_ipc.h" #include "core/hle/kernel/k_client_port.h" #include "core/hle/kernel/k_handle_table.h" -#include "core/hle/kernel/k_port.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_server_port.h" @@ -22,6 +21,7 @@ #include "core/hle/kernel/k_session.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/service_thread.h" #include "core/memory.h" namespace Kernel { diff --git a/src/core/hle/kernel/k_server_session.h b/src/core/hle/kernel/k_server_session.h index d44bc9d4f..5b76bf17c 100644 --- a/src/core/hle/kernel/k_server_session.h +++ b/src/core/hle/kernel/k_server_session.h @@ -7,14 +7,11 @@ #include <memory> #include <string> #include <utility> -#include <vector> #include <boost/intrusive/list.hpp> -#include "common/threadsafe_queue.h" #include "core/hle/kernel/hle_ipc.h" #include "core/hle/kernel/k_synchronization_object.h" -#include "core/hle/kernel/service_thread.h" #include "core/hle/result.h" namespace Core::Memory { diff --git a/src/core/hle/kernel/k_session.cpp b/src/core/hle/kernel/k_session.cpp index 940878e03..a64b56b9e 100644 --- a/src/core/hle/kernel/k_session.cpp +++ b/src/core/hle/kernel/k_session.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/assert.h" #include "core/hle/kernel/k_client_port.h" #include "core/hle/kernel/k_client_session.h" #include "core/hle/kernel/k_scoped_resource_reservation.h" diff --git a/src/core/hle/kernel/k_shared_memory.h b/src/core/hle/kernel/k_shared_memory.h index e9815f90b..81de36136 100644 --- a/src/core/hle/kernel/k_shared_memory.h +++ b/src/core/hle/kernel/k_shared_memory.h @@ -4,7 +4,6 @@ #pragma once -#include <memory> #include <string> #include "common/common_types.h" diff --git a/src/core/hle/kernel/k_shared_memory_info.h b/src/core/hle/kernel/k_shared_memory_info.h index bf97a0184..20bc19f46 100644 --- a/src/core/hle/kernel/k_shared_memory_info.h +++ b/src/core/hle/kernel/k_shared_memory_info.h @@ -4,12 +4,8 @@ #pragma once -#include <memory> -#include <string> - #include <boost/intrusive/list.hpp> -#include "common/assert.h" #include "core/hle/kernel/slab_helpers.h" namespace Kernel { diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 9f1d3156b..db65ce79a 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -13,8 +13,6 @@ #include "common/common_types.h" #include "common/fiber.h" #include "common/logging/log.h" -#include "common/scope_exit.h" -#include "common/thread_queue_list.h" #include "core/core.h" #include "core/cpu_manager.h" #include "core/hardware_properties.h" @@ -31,11 +29,9 @@ #include "core/hle/kernel/svc_results.h" #include "core/hle/kernel/time_manager.h" #include "core/hle/result.h" -#include "core/memory.h" #ifdef ARCHITECTURE_x86_64 #include "core/arm/dynarmic/arm_dynarmic_32.h" -#include "core/arm/dynarmic/arm_dynarmic_64.h" #endif namespace { diff --git a/src/core/hle/kernel/k_trace.h b/src/core/hle/kernel/k_trace.h index 79391bccb..d3fed1888 100644 --- a/src/core/hle/kernel/k_trace.h +++ b/src/core/hle/kernel/k_trace.h @@ -4,8 +4,6 @@ #pragma once -#include "common/common_funcs.h" - namespace Kernel { using namespace Common::Literals; diff --git a/src/core/hle/kernel/k_transfer_memory.h b/src/core/hle/kernel/k_transfer_memory.h index 31029a5c2..cb7521823 100644 --- a/src/core/hle/kernel/k_transfer_memory.h +++ b/src/core/hle/kernel/k_transfer_memory.h @@ -4,8 +4,6 @@ #pragma once -#include <memory> - #include "core/hle/kernel/slab_helpers.h" #include "core/hle/kernel/svc_types.h" #include "core/hle/result.h" diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 4a139c5e7..e42a6d36f 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -39,9 +39,7 @@ #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/service_thread.h" -#include "core/hle/kernel/svc_results.h" #include "core/hle/kernel/time_manager.h" -#include "core/hle/lock.h" #include "core/hle/result.h" #include "core/hle/service/sm/sm.h" #include "core/memory.h" diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 901f7e3b0..16a032e89 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -4,7 +4,6 @@ #pragma once -#include <array> #include <cstddef> #include <memory> diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp index 2ae80beca..6721b6276 100644 --- a/src/core/hle/kernel/service_thread.cpp +++ b/src/core/hle/kernel/service_thread.cpp @@ -9,15 +9,11 @@ #include <vector> #include <queue> -#include "common/assert.h" #include "common/scope_exit.h" #include "common/thread.h" -#include "core/core.h" #include "core/hle/kernel/k_session.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/service_thread.h" -#include "core/hle/lock.h" -#include "video_core/renderer_base.h" namespace Kernel { diff --git a/src/core/hle/kernel/slab_helpers.h b/src/core/hle/kernel/slab_helpers.h index 0c5995db0..f1c11256e 100644 --- a/src/core/hle/kernel/slab_helpers.h +++ b/src/core/hle/kernel/slab_helpers.h @@ -4,16 +4,8 @@ #pragma once -#include <atomic> - -#include "common/assert.h" -#include "common/common_funcs.h" -#include "common/common_types.h" -#include "common/intrusive_red_black_tree.h" #include "core/hle/kernel/k_auto_object.h" #include "core/hle/kernel/k_auto_object_container.h" -#include "core/hle/kernel/k_light_lock.h" -#include "core/hle/kernel/k_slab_heap.h" #include "core/hle/kernel/kernel.h" namespace Kernel { diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index c43135856..f9d99bc51 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -13,18 +13,11 @@ #include "common/common_funcs.h" #include "common/fiber.h" #include "common/logging/log.h" -#include "common/microprofile.h" #include "common/scope_exit.h" -#include "common/string_util.h" -#include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" -#include "core/cpu_manager.h" -#include "core/hle/kernel/k_address_arbiter.h" #include "core/hle/kernel/k_client_port.h" #include "core/hle/kernel/k_client_session.h" -#include "core/hle/kernel/k_condition_variable.h" #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_handle_table.h" #include "core/hle/kernel/k_memory_block.h" @@ -35,7 +28,6 @@ #include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_resource_reservation.h" -#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/k_shared_memory.h" #include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/k_thread.h" @@ -47,10 +39,8 @@ #include "core/hle/kernel/svc_results.h" #include "core/hle/kernel/svc_types.h" #include "core/hle/kernel/svc_wrap.h" -#include "core/hle/kernel/time_manager.h" #include "core/hle/lock.h" #include "core/hle/result.h" -#include "core/hle/service/service.h" #include "core/memory.h" #include "core/reporter.h" @@ -409,7 +399,7 @@ static ResultCode GetProcessId32(Core::System& system, u32* out_process_id_low, /// Wait for the given handles to synchronize, timeout after the specified nanoseconds static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr handles_address, - u64 num_handles, s64 nano_seconds) { + s32 num_handles, s64 nano_seconds) { LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, num_handles={}, nano_seconds={}", handles_address, num_handles, nano_seconds); @@ -434,7 +424,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha // Ensure handles are closed when we're done. SCOPE_EXIT({ - for (u64 i = 0; i < num_handles; ++i) { + for (s32 i = 0; i < num_handles; ++i) { kernel.UnregisterInUseObject(objs[i]); objs[i]->Close(); } @@ -778,7 +768,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, Handle return ResultSuccess; case GetInfoType::TitleId: - *result = process->GetTitleID(); + *result = process->GetProgramID(); return ResultSuccess; case GetInfoType::UserExceptionContextAddr: diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 913b16494..6e62e656f 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -248,10 +248,10 @@ void SvcWrap64(Core::System& system) { } // Used by WaitSynchronization -template <ResultCode func(Core::System&, s32*, u64, u64, s64)> +template <ResultCode func(Core::System&, s32*, u64, s32, s64)> void SvcWrap64(Core::System& system) { s32 param_1 = 0; - const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<u32>(Param(system, 2)), + const u32 retval = func(system, ¶m_1, Param(system, 1), static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) .raw; diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp index ae9b4be2f..8cd7279a3 100644 --- a/src/core/hle/kernel/time_manager.cpp +++ b/src/core/hle/kernel/time_manager.cpp @@ -5,10 +5,7 @@ #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" -#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/time_manager.h" namespace Kernel { diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h index 2d175a9c4..b1fa26e8c 100644 --- a/src/core/hle/kernel/time_manager.h +++ b/src/core/hle/kernel/time_manager.h @@ -6,7 +6,6 @@ #include <memory> #include <mutex> -#include <unordered_map> namespace Core { class System; diff --git a/src/core/hle/result.h b/src/core/hle/result.h index 2c6b24848..3807b9aa8 100644 --- a/src/core/hle/result.h +++ b/src/core/hle/result.h @@ -4,11 +4,10 @@ #pragma once -#include <new> -#include <utility> #include "common/assert.h" #include "common/bit_field.h" #include "common/common_types.h" +#include "common/expected.h" // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes @@ -155,203 +154,130 @@ constexpr ResultCode ResultSuccess(0); constexpr ResultCode ResultUnknown(UINT32_MAX); /** - * This is an optional value type. It holds a `ResultCode` and, if that code is a success code, - * also holds a result of type `T`. If the code is an error code then trying to access the inner - * value fails, thus ensuring that the ResultCode of functions is always checked properly before - * their return value is used. It is similar in concept to the `std::optional` type - * (http://en.cppreference.com/w/cpp/experimental/optional) originally proposed for inclusion in - * C++14, or the `Result` type in Rust (http://doc.rust-lang.org/std/result/index.html). + * This is an optional value type. It holds a `ResultCode` and, if that code is ResultSuccess, it + * also holds a result of type `T`. If the code is an error code (not ResultSuccess), then trying + * to access the inner value with operator* is undefined behavior and will assert with Unwrap(). + * Users of this class must be cognizant to check the status of the ResultVal with operator bool(), + * Code(), Succeeded() or Failed() prior to accessing the inner value. * * An example of how it could be used: * \code * ResultVal<int> Frobnicate(float strength) { * if (strength < 0.f || strength > 1.0f) { * // Can't frobnicate too weakly or too strongly - * return ResultCode(ErrorDescription::OutOfRange, ErrorModule::Common, - * ErrorSummary::InvalidArgument, ErrorLevel::Permanent); + * return ResultCode{ErrorModule::Common, 1}; * } else { * // Frobnicated! Give caller a cookie - * return MakeResult<int>(42); + * return 42; * } * } * \endcode * * \code - * ResultVal<int> frob_result = Frobnicate(0.75f); + * auto frob_result = Frobnicate(0.75f); * if (frob_result) { * // Frobbed ok * printf("My cookie is %d\n", *frob_result); * } else { - * printf("Guess I overdid it. :( Error code: %ux\n", frob_result.code().hex); + * printf("Guess I overdid it. :( Error code: %ux\n", frob_result.Code().raw); * } * \endcode */ template <typename T> class ResultVal { public: - /// Constructs an empty `ResultVal` with the given error code. The code must not be a success - /// code. - ResultVal(ResultCode error_code = ResultUnknown) : result_code(error_code) { - ASSERT(error_code.IsError()); - } + constexpr ResultVal() : expected{} {} + + constexpr ResultVal(ResultCode code) : expected{Common::Unexpected(code)} {} + + template <typename U> + constexpr ResultVal(U&& val) : expected{std::forward<U>(val)} {} - /** - * Similar to the non-member function `MakeResult`, with the exception that you can manually - * specify the success code. `success_code` must not be an error code. - */ template <typename... Args> - [[nodiscard]] static ResultVal WithCode(ResultCode success_code, Args&&... args) { - ResultVal<T> result; - result.emplace(success_code, std::forward<Args>(args)...); - return result; - } + constexpr ResultVal(Args&&... args) : expected{std::in_place, std::forward<Args>(args)...} {} - ResultVal(const ResultVal& o) noexcept : result_code(o.result_code) { - if (!o.empty()) { - new (&object) T(o.object); - } - } + ~ResultVal() = default; - ResultVal(ResultVal&& o) noexcept : result_code(o.result_code) { - if (!o.empty()) { - new (&object) T(std::move(o.object)); - } - } + constexpr ResultVal(const ResultVal&) = default; + constexpr ResultVal(ResultVal&&) = default; - ~ResultVal() { - if (!empty()) { - object.~T(); - } - } + ResultVal& operator=(const ResultVal&) = default; + ResultVal& operator=(ResultVal&&) = default; - ResultVal& operator=(const ResultVal& o) noexcept { - if (this == &o) { - return *this; - } - if (!empty()) { - if (!o.empty()) { - object = o.object; - } else { - object.~T(); - } - } else { - if (!o.empty()) { - new (&object) T(o.object); - } - } - result_code = o.result_code; - - return *this; + [[nodiscard]] constexpr explicit operator bool() const noexcept { + return expected.has_value(); } - ResultVal& operator=(ResultVal&& o) noexcept { - if (this == &o) { - return *this; - } - if (!empty()) { - if (!o.empty()) { - object = std::move(o.object); - } else { - object.~T(); - } - } else { - if (!o.empty()) { - new (&object) T(std::move(o.object)); - } - } - result_code = o.result_code; - - return *this; + [[nodiscard]] constexpr ResultCode Code() const { + return expected.has_value() ? ResultSuccess : expected.error(); } - /** - * Replaces the current result with a new constructed result value in-place. The code must not - * be an error code. - */ - template <typename... Args> - void emplace(ResultCode success_code, Args&&... args) { - ASSERT(success_code.IsSuccess()); - if (!empty()) { - object.~T(); - } - new (&object) T(std::forward<Args>(args)...); - result_code = success_code; + [[nodiscard]] constexpr bool Succeeded() const { + return expected.has_value(); } - /// Returns true if the `ResultVal` contains an error code and no value. - [[nodiscard]] bool empty() const { - return result_code.IsError(); + [[nodiscard]] constexpr bool Failed() const { + return !expected.has_value(); } - /// Returns true if the `ResultVal` contains a return value. - [[nodiscard]] bool Succeeded() const { - return result_code.IsSuccess(); + [[nodiscard]] constexpr T* operator->() { + return std::addressof(expected.value()); } - /// Returns true if the `ResultVal` contains an error code and no value. - [[nodiscard]] bool Failed() const { - return empty(); + + [[nodiscard]] constexpr const T* operator->() const { + return std::addressof(expected.value()); } - [[nodiscard]] ResultCode Code() const { - return result_code; + [[nodiscard]] constexpr T& operator*() & { + return *expected; } - [[nodiscard]] const T& operator*() const { - return object; + [[nodiscard]] constexpr const T& operator*() const& { + return *expected; } - [[nodiscard]] T& operator*() { - return object; + + [[nodiscard]] constexpr T&& operator*() && { + return *expected; } - [[nodiscard]] const T* operator->() const { - return &object; + + [[nodiscard]] constexpr const T&& operator*() const&& { + return *expected; } - [[nodiscard]] T* operator->() { - return &object; + + [[nodiscard]] constexpr T& Unwrap() & { + ASSERT_MSG(Succeeded(), "Tried to Unwrap empty ResultVal"); + return expected.value(); } - /// Returns the value contained in this `ResultVal`, or the supplied default if it is missing. - template <typename U> - [[nodiscard]] T ValueOr(U&& value) const { - return !empty() ? object : std::move(value); + [[nodiscard]] constexpr const T& Unwrap() const& { + ASSERT_MSG(Succeeded(), "Tried to Unwrap empty ResultVal"); + return expected.value(); } - /// Asserts that the result succeeded and returns a reference to it. - [[nodiscard]] T& Unwrap() & { + [[nodiscard]] constexpr T&& Unwrap() && { ASSERT_MSG(Succeeded(), "Tried to Unwrap empty ResultVal"); - return **this; + return std::move(expected.value()); } - [[nodiscard]] T&& Unwrap() && { + [[nodiscard]] constexpr const T&& Unwrap() const&& { ASSERT_MSG(Succeeded(), "Tried to Unwrap empty ResultVal"); - return std::move(**this); + return std::move(expected.value()); } -private: - // A union is used to allocate the storage for the value, while allowing us to construct and - // destruct it at will. - union { - T object; - }; - ResultCode result_code; -}; + template <typename U> + [[nodiscard]] constexpr T ValueOr(U&& v) const& { + return expected.value_or(v); + } -/** - * This function is a helper used to construct `ResultVal`s. It receives the arguments to construct - * `T` with and creates a success `ResultVal` contained the constructed value. - */ -template <typename T, typename... Args> -[[nodiscard]] ResultVal<T> MakeResult(Args&&... args) { - return ResultVal<T>::WithCode(ResultSuccess, std::forward<Args>(args)...); -} + template <typename U> + [[nodiscard]] constexpr T ValueOr(U&& v) && { + return expected.value_or(v); + } -/** - * Deducible overload of MakeResult, allowing the template parameter to be ommited if you're just - * copy or move constructing. - */ -template <typename Arg> -[[nodiscard]] ResultVal<std::remove_cvref_t<Arg>> MakeResult(Arg&& arg) { - return ResultVal<std::remove_cvref_t<Arg>>::WithCode(ResultSuccess, std::forward<Arg>(arg)); -} +private: + // TODO: Replace this with std::expected once it is standardized in the STL. + Common::Expected<T, ResultCode> expected; +}; /** * Check for the success of `source` (which must evaluate to a ResultVal). If it succeeds, unwraps diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 689b36056..6e63e057e 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -16,7 +16,6 @@ #include "core/file_sys/control_metadata.h" #include "core/file_sys/patch_manager.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/kernel.h" #include "core/hle/service/acc/acc.h" #include "core/hle/service/acc/acc_aa.h" @@ -26,9 +25,7 @@ #include "core/hle/service/acc/async_context.h" #include "core/hle/service/acc/errors.h" #include "core/hle/service/acc/profile_manager.h" -#include "core/hle/service/glue/arp.h" #include "core/hle/service/glue/glue_manager.h" -#include "core/hle/service/sm/sm.h" #include "core/loader/loader.h" namespace Service::Account { @@ -761,9 +758,8 @@ ResultCode Module::Interface::InitializeApplicationInfoBase() { // TODO(ogniK): This should be changed to reflect the target process for when we have multiple // processes emulated. As we don't actually have pid support we should assume we're just using // our own process - const auto& current_process = system.Kernel().CurrentProcess(); const auto launch_property = - system.GetARPManager().GetLaunchProperty(current_process->GetTitleID()); + system.GetARPManager().GetLaunchProperty(system.GetCurrentProcessProgramID()); if (launch_property.Failed()) { LOG_ERROR(Service_ACC, "Failed to get launch property"); @@ -807,7 +803,7 @@ void Module::Interface::IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx bool is_locked = false; if (res != Loader::ResultStatus::Success) { - const FileSys::PatchManager pm{system.CurrentProcess()->GetTitleID(), + const FileSys::PatchManager pm{system.GetCurrentProcessProgramID(), system.GetFileSystemController(), system.GetContentProvider()}; const auto nacp_unique = pm.GetControlMetadata().first; @@ -826,6 +822,13 @@ void Module::Interface::IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx rb.Push(is_locked); } +void Module::Interface::InitializeApplicationInfoV2(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_ACC, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + void Module::Interface::GetProfileEditor(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; Common::UUID user_id = rp.PopRaw<Common::UUID>(); diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h index a83a480cd..f7e9bc4f8 100644 --- a/src/core/hle/service/acc/acc.h +++ b/src/core/hle/service/acc/acc.h @@ -33,6 +33,7 @@ public: void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx); void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx); void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); + void InitializeApplicationInfoV2(Kernel::HLERequestContext& ctx); void GetProfileEditor(Kernel::HLERequestContext& ctx); void ListQualifiedUsers(Kernel::HLERequestContext& ctx); void LoadOpenContext(Kernel::HLERequestContext& ctx); diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp index ed241647c..df77c58f0 100644 --- a/src/core/hle/service/acc/acc_u0.cpp +++ b/src/core/hle/service/acc/acc_u0.cpp @@ -34,6 +34,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module_, std::shared_ptr<ProfileManager> {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, // 6.0.0+ {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+ {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, // 6.0.0+ + {160, &ACC_U0::InitializeApplicationInfoV2, "InitializeApplicationInfoV2"}, }; // clang-format on diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index eccdcc20d..aee8d4f93 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -15,15 +15,12 @@ #include "core/file_sys/savedata_factory.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_transfer_memory.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/acc/profile_manager.h" #include "core/hle/service/am/am.h" #include "core/hle/service/am/applet_ae.h" #include "core/hle/service/am/applet_oe.h" #include "core/hle/service/am/applets/applet_profile_select.h" -#include "core/hle/service/am/applets/applet_software_keyboard.h" #include "core/hle/service/am/applets/applet_web_browser.h" #include "core/hle/service/am/applets/applets.h" #include "core/hle/service/am/idle.h" @@ -37,7 +34,6 @@ #include "core/hle/service/ns/ns.h" #include "core/hle/service/nvflinger/nvflinger.h" #include "core/hle/service/pm/pm.h" -#include "core/hle/service/set/set.h" #include "core/hle/service/sm/sm.h" #include "core/hle/service/vi/vi.h" #include "core/memory.h" @@ -801,15 +797,11 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext& rb.Push(ResultSuccess); if (Settings::values.use_docked_mode.GetValue()) { - rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth)); + rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight)); } else { - rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth)); + rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight)); } } @@ -1432,7 +1424,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) { u64 build_id{}; std::memcpy(&build_id, build_id_full.data(), sizeof(u64)); - auto data = backend->GetLaunchParameter({system.CurrentProcess()->GetTitleID(), build_id}); + auto data = backend->GetLaunchParameter({system.GetCurrentProcessProgramID(), build_id}); if (data.has_value()) { IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(ResultSuccess); @@ -1484,7 +1476,7 @@ void IApplicationFunctions::EnsureSaveData(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called, uid={:016X}{:016X}", user_id[1], user_id[0]); FileSys::SaveDataAttribute attribute{}; - attribute.title_id = system.CurrentProcess()->GetTitleID(); + attribute.title_id = system.GetCurrentProcessProgramID(); attribute.user_id = user_id; attribute.type = FileSys::SaveDataType::SaveData; const auto res = system.GetFileSystemController().CreateSaveData( @@ -1514,7 +1506,7 @@ void IApplicationFunctions::GetDisplayVersion(Kernel::HLERequestContext& ctx) { std::array<u8, 0x10> version_string{}; const auto res = [this] { - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); const FileSys::PatchManager pm{title_id, system.GetFileSystemController(), system.GetContentProvider()}; @@ -1551,7 +1543,7 @@ void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) { u32 supported_languages = 0; const auto res = [this] { - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); const FileSys::PatchManager pm{title_id, system.GetFileSystemController(), system.GetContentProvider()}; @@ -1659,7 +1651,7 @@ void IApplicationFunctions::ExtendSaveData(Kernel::HLERequestContext& ctx) { static_cast<u8>(type), user_id[1], user_id[0], new_normal_size, new_journal_size); system.GetFileSystemController().WriteSaveDataSize( - type, system.CurrentProcess()->GetTitleID(), user_id, {new_normal_size, new_journal_size}); + type, system.GetCurrentProcessProgramID(), user_id, {new_normal_size, new_journal_size}); IPC::ResponseBuilder rb{ctx, 4}; rb.Push(ResultSuccess); @@ -1683,7 +1675,7 @@ void IApplicationFunctions::GetSaveDataSize(Kernel::HLERequestContext& ctx) { user_id[0]); const auto size = system.GetFileSystemController().ReadSaveDataSize( - type, system.CurrentProcess()->GetTitleID(), user_id); + type, system.GetCurrentProcessProgramID(), user_id); IPC::ResponseBuilder rb{ctx, 6}; rb.Push(ResultSuccess); diff --git a/src/core/hle/service/am/applets/applet_error.cpp b/src/core/hle/service/am/applets/applet_error.cpp index 36a4aa9cd..a06c2b872 100644 --- a/src/core/hle/service/am/applets/applet_error.cpp +++ b/src/core/hle/service/am/applets/applet_error.cpp @@ -9,7 +9,6 @@ #include "common/string_util.h" #include "core/core.h" #include "core/frontend/applets/error.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/service/am/am.h" #include "core/hle/service/am/applets/applet_error.h" #include "core/reporter.h" @@ -167,7 +166,7 @@ void Error::Execute() { } const auto callback = [this] { DisplayCompleted(); }; - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); const auto& reporter{system.GetReporter()}; switch (mode) { diff --git a/src/core/hle/service/am/applets/applet_general_backend.cpp b/src/core/hle/service/am/applets/applet_general_backend.cpp index 0f413f9a0..2c6e9d83c 100644 --- a/src/core/hle/service/am/applets/applet_general_backend.cpp +++ b/src/core/hle/service/am/applets/applet_general_backend.cpp @@ -2,14 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <string_view> - #include "common/assert.h" #include "common/hex_util.h" #include "common/logging/log.h" #include "core/core.h" #include "core/frontend/applets/general_frontend.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/result.h" #include "core/hle/service/am/am.h" #include "core/hle/service/am/applets/applet_general_backend.h" @@ -189,7 +186,7 @@ void PhotoViewer::Execute() { const auto callback = [this] { ViewFinished(); }; switch (mode) { case PhotoViewerAppletMode::CurrentApp: - frontend.ShowPhotosForApplication(system.CurrentProcess()->GetTitleID(), callback); + frontend.ShowPhotosForApplication(system.GetCurrentProcessProgramID(), callback); break; case PhotoViewerAppletMode::AllApps: frontend.ShowAllPhotos(callback); diff --git a/src/core/hle/service/am/applets/applet_software_keyboard.cpp b/src/core/hle/service/am/applets/applet_software_keyboard.cpp index c89aa1bbf..f38f53f69 100644 --- a/src/core/hle/service/am/applets/applet_software_keyboard.cpp +++ b/src/core/hle/service/am/applets/applet_software_keyboard.cpp @@ -109,13 +109,18 @@ void SoftwareKeyboard::Execute() { ShowNormalKeyboard(); } -void SoftwareKeyboard::SubmitTextNormal(SwkbdResult result, std::u16string submitted_text) { +void SoftwareKeyboard::SubmitTextNormal(SwkbdResult result, std::u16string submitted_text, + bool confirmed) { if (complete) { return; } if (swkbd_config_common.use_text_check && result == SwkbdResult::Ok) { - SubmitForTextCheck(submitted_text); + if (confirmed) { + SubmitNormalOutputAndExit(result, submitted_text); + } else { + SubmitForTextCheck(submitted_text); + } } else { SubmitNormalOutputAndExit(result, submitted_text); } @@ -273,13 +278,21 @@ void SoftwareKeyboard::ProcessTextCheck() { std::memcpy(&swkbd_text_check, text_check_data.data(), sizeof(SwkbdTextCheck)); - std::u16string text_check_message = - swkbd_text_check.text_check_result == SwkbdTextCheckResult::Failure || - swkbd_text_check.text_check_result == SwkbdTextCheckResult::Confirm - ? Common::UTF16StringFromFixedZeroTerminatedBuffer( - swkbd_text_check.text_check_message.data(), - swkbd_text_check.text_check_message.size()) - : u""; + std::u16string text_check_message = [this, &swkbd_text_check]() -> std::u16string { + if (swkbd_text_check.text_check_result == SwkbdTextCheckResult::Failure || + swkbd_text_check.text_check_result == SwkbdTextCheckResult::Confirm) { + return swkbd_config_common.use_utf8 + ? Common::UTF8ToUTF16(Common::StringFromFixedZeroTerminatedBuffer( + reinterpret_cast<const char*>( + swkbd_text_check.text_check_message.data()), + swkbd_text_check.text_check_message.size() * sizeof(char16_t))) + : Common::UTF16StringFromFixedZeroTerminatedBuffer( + swkbd_text_check.text_check_message.data(), + swkbd_text_check.text_check_message.size()); + } else { + return u""; + } + }(); LOG_INFO(Service_AM, "\nTextCheckResult: {}\nTextCheckMessage: {}", GetTextCheckResultName(swkbd_text_check.text_check_result), @@ -583,11 +596,12 @@ void SoftwareKeyboard::InitializeFrontendKeyboard() { .disable_cancel_button{disable_cancel_button}, }; - frontend.InitializeKeyboard(false, std::move(initialize_parameters), - [this](SwkbdResult result, std::u16string submitted_text) { - SubmitTextNormal(result, submitted_text); - }, - {}); + frontend.InitializeKeyboard( + false, std::move(initialize_parameters), + [this](SwkbdResult result, std::u16string submitted_text, bool confirmed) { + SubmitTextNormal(result, submitted_text, confirmed); + }, + {}); } } diff --git a/src/core/hle/service/am/applets/applet_software_keyboard.h b/src/core/hle/service/am/applets/applet_software_keyboard.h index 9aef1bf11..a0fddd965 100644 --- a/src/core/hle/service/am/applets/applet_software_keyboard.h +++ b/src/core/hle/service/am/applets/applet_software_keyboard.h @@ -4,7 +4,6 @@ #pragma once -#include "common/common_funcs.h" #include "common/common_types.h" #include "core/hle/result.h" #include "core/hle/service/am/applets/applet_software_keyboard_types.h" @@ -37,8 +36,9 @@ public: * * @param result SwkbdResult enum * @param submitted_text UTF-16 encoded string + * @param confirmed Whether the text has been confirmed after TextCheckResult::Confirm */ - void SubmitTextNormal(SwkbdResult result, std::u16string submitted_text); + void SubmitTextNormal(SwkbdResult result, std::u16string submitted_text, bool confirmed); /** * Submits the input text to the application. diff --git a/src/core/hle/service/am/applets/applet_web_browser.cpp b/src/core/hle/service/am/applets/applet_web_browser.cpp index 927eeefff..bb5cb61be 100644 --- a/src/core/hle/service/am/applets/applet_web_browser.cpp +++ b/src/core/hle/service/am/applets/applet_web_browser.cpp @@ -18,7 +18,6 @@ #include "core/file_sys/system_archive/system_archive.h" #include "core/file_sys/vfs_vector.h" #include "core/frontend/applets/web_browser.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/result.h" #include "core/hle/service/am/am.h" #include "core/hle/service/am/applets/applet_web_browser.h" @@ -395,7 +394,7 @@ void WebBrowser::InitializeOffline() { switch (document_kind) { case DocumentKind::OfflineHtmlPage: default: - title_id = system.CurrentProcess()->GetTitleID(); + title_id = system.GetCurrentProcessProgramID(); nca_type = FileSys::ContentRecordType::HtmlDocument; additional_paths = "html-document"; break; diff --git a/src/core/hle/service/am/applets/applet_web_browser.h b/src/core/hle/service/am/applets/applet_web_browser.h index 4f9e81b79..b3364ee06 100644 --- a/src/core/hle/service/am/applets/applet_web_browser.h +++ b/src/core/hle/service/am/applets/applet_web_browser.h @@ -7,7 +7,6 @@ #include <filesystem> #include <optional> -#include "common/common_funcs.h" #include "common/common_types.h" #include "core/file_sys/vfs_types.h" #include "core/hle/result.h" diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp index 4c54066c6..3c83717b5 100644 --- a/src/core/hle/service/aoc/aoc_u.cpp +++ b/src/core/hle/service/aoc/aoc_u.cpp @@ -17,8 +17,6 @@ #include "core/file_sys/registered_cache.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/k_process.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/aoc/aoc_u.h" #include "core/loader/loader.h" @@ -124,9 +122,14 @@ AOC_U::AOC_U(Core::System& system_) {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, {9, nullptr, "GetAddOnContentLostErrorCode"}, {10, &AOC_U::GetAddOnContentListChangedEventWithProcessId, "GetAddOnContentListChangedEventWithProcessId"}, + {11, &AOC_U::NotifyMountAddOnContent, "NotifyMountAddOnContent"}, + {12, &AOC_U::NotifyUnmountAddOnContent, "NotifyUnmountAddOnContent"}, + {13, nullptr, "IsAddOnContentMountedForDebug"}, + {50, &AOC_U::CheckAddOnContentMountStatus, "CheckAddOnContentMountStatus"}, {100, &AOC_U::CreateEcPurchasedEventManager, "CreateEcPurchasedEventManager"}, {101, &AOC_U::CreatePermanentEcPurchasedEventManager, "CreatePermanentEcPurchasedEventManager"}, {110, nullptr, "CreateContentsServiceManager"}, + {200, nullptr, "SetRequiredAddOnContentsOnContentsAvailabilityTransition"}, }; // clang-format on @@ -153,7 +156,7 @@ void AOC_U::CountAddOnContent(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); - const auto current = system.CurrentProcess()->GetTitleID(); + const auto current = system.GetCurrentProcessProgramID(); const auto& disabled = Settings::values.disabled_addons[current]; if (std::find(disabled.begin(), disabled.end(), "DLC") != disabled.end()) { @@ -180,7 +183,7 @@ void AOC_U::ListAddOnContent(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AOC, "called with offset={}, count={}, process_id={}", offset, count, process_id); - const auto current = system.CurrentProcess()->GetTitleID(); + const auto current = system.GetCurrentProcessProgramID(); std::vector<u32> out; const auto& disabled = Settings::values.disabled_addons[current]; @@ -226,7 +229,7 @@ void AOC_U::GetAddOnContentBaseId(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 4}; rb.Push(ResultSuccess); - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); const FileSys::PatchManager pm{title_id, system.GetFileSystemController(), system.GetContentProvider()}; @@ -272,6 +275,27 @@ void AOC_U::GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestConte rb.PushCopyObjects(aoc_change_event->GetReadableEvent()); } +void AOC_U::NotifyMountAddOnContent(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_AOC, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + +void AOC_U::NotifyUnmountAddOnContent(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_AOC, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + +void AOC_U::CheckAddOnContentMountStatus(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_AOC, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + void AOC_U::CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_AOC, "(STUBBED) called"); diff --git a/src/core/hle/service/aoc/aoc_u.h b/src/core/hle/service/aoc/aoc_u.h index 31d645be8..4b5f7c5f2 100644 --- a/src/core/hle/service/aoc/aoc_u.h +++ b/src/core/hle/service/aoc/aoc_u.h @@ -29,6 +29,9 @@ private: void PrepareAddOnContent(Kernel::HLERequestContext& ctx); void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx); void GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx); + void NotifyMountAddOnContent(Kernel::HLERequestContext& ctx); + void NotifyUnmountAddOnContent(Kernel::HLERequestContext& ctx); + void CheckAddOnContentMountStatus(Kernel::HLERequestContext& ctx); void CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx); void CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx); diff --git a/src/core/hle/service/apm/apm.cpp b/src/core/hle/service/apm/apm.cpp index f5ebfe8d6..243ea15b8 100644 --- a/src/core/hle/service/apm/apm.cpp +++ b/src/core/hle/service/apm/apm.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "core/core.h" -#include "core/hle/ipc_helpers.h" #include "core/hle/service/apm/apm.h" #include "core/hle/service/apm/apm_interface.h" diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 81adbfe09..affa7971c 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -14,7 +14,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/audio/audout_u.h" #include "core/hle/service/audio/errors.h" #include "core/hle/service/kernel_helpers.h" diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index cdb2a9521..f45e5cecc 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -2,10 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <algorithm> #include <array> #include <memory> -#include <string_view> #include "audio_core/audio_renderer.h" #include "common/alignment.h" @@ -16,7 +14,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/audio/audren_u.h" #include "core/hle/service/audio/errors.h" diff --git a/src/core/hle/service/bcat/backend/backend.h b/src/core/hle/service/bcat/backend/backend.h index 749e046c7..59c6d4740 100644 --- a/src/core/hle/service/bcat/backend/backend.h +++ b/src/core/hle/service/bcat/backend/backend.h @@ -7,7 +7,6 @@ #include <functional> #include <optional> #include <string> -#include <string_view> #include "common/common_types.h" #include "core/file_sys/vfs_types.h" diff --git a/src/core/hle/service/bcat/bcat_module.cpp b/src/core/hle/service/bcat/bcat_module.cpp index 27e9b8df8..500e7e52d 100644 --- a/src/core/hle/service/bcat/bcat_module.cpp +++ b/src/core/hle/service/bcat/bcat_module.cpp @@ -11,9 +11,7 @@ #include "core/core.h" #include "core/file_sys/vfs.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_readable_event.h" -#include "core/hle/kernel/k_writable_event.h" #include "core/hle/service/bcat/backend/backend.h" #include "core/hle/service/bcat/bcat.h" #include "core/hle/service/bcat/bcat_module.h" @@ -179,7 +177,7 @@ private: void RequestSyncDeliveryCache(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_BCAT, "called"); - backend.Synchronize({system.CurrentProcess()->GetTitleID(), + backend.Synchronize({system.GetCurrentProcessProgramID(), GetCurrentBuildID(system.GetCurrentProcessBuildID())}, GetProgressBackend(SyncType::Normal)); @@ -196,7 +194,7 @@ private: LOG_DEBUG(Service_BCAT, "called, name={}", name); - backend.SynchronizeDirectory({system.CurrentProcess()->GetTitleID(), + backend.SynchronizeDirectory({system.GetCurrentProcessProgramID(), GetCurrentBuildID(system.GetCurrentProcessBuildID())}, name, GetProgressBackend(SyncType::Directory)); @@ -557,7 +555,7 @@ private: void Module::Interface::CreateDeliveryCacheStorageService(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_BCAT, "called"); - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(ResultSuccess); rb.PushIpcInterface<IDeliveryCacheStorageService>(system, fsc.GetBCATDirectory(title_id)); diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp index 088a1a18a..0787f43f4 100644 --- a/src/core/hle/service/btdrv/btdrv.cpp +++ b/src/core/hle/service/btdrv/btdrv.cpp @@ -6,7 +6,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/btdrv/btdrv.h" #include "core/hle/service/kernel_helpers.h" #include "core/hle/service/service.h" diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp index 7aabacc19..d337fd317 100644 --- a/src/core/hle/service/btm/btm.cpp +++ b/src/core/hle/service/btm/btm.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/btm/btm.h" #include "core/hle/service/kernel_helpers.h" #include "core/hle/service/service.h" diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp index 2c2619a7d..f84506af0 100644 --- a/src/core/hle/service/fatal/fatal.cpp +++ b/src/core/hle/service/fatal/fatal.cpp @@ -11,7 +11,6 @@ #include "common/swap.h" #include "core/core.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/service/fatal/fatal.h" #include "core/hle/service/fatal/fatal_p.h" #include "core/hle/service/fatal/fatal_u.h" @@ -66,7 +65,7 @@ enum class FatalType : u32 { static void GenerateErrorReport(Core::System& system, ResultCode error_code, const FatalInfo& info) { - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); std::string crash_report = fmt::format( "Yuzu {}-{} crash report\n" "Title ID: {:016x}\n" diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index f8f9e32f7..3703ca4c6 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp @@ -13,7 +13,6 @@ #include "core/file_sys/control_metadata.h" #include "core/file_sys/errors.h" #include "core/file_sys/mode.h" -#include "core/file_sys/partition_filesystem.h" #include "core/file_sys/patch_manager.h" #include "core/file_sys/registered_cache.h" #include "core/file_sys/romfs_factory.h" @@ -21,7 +20,6 @@ #include "core/file_sys/sdmc_factory.h" #include "core/file_sys/vfs.h" #include "core/file_sys/vfs_offset.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/filesystem/fsp_ldr.h" #include "core/hle/service/filesystem/fsp_pr.h" @@ -226,11 +224,10 @@ ResultVal<FileSys::VirtualFile> VfsDirectoryServiceWrapper::OpenFile(const std:: } if (mode == FileSys::Mode::Append) { - return MakeResult<FileSys::VirtualFile>( - std::make_shared<FileSys::OffsetVfsFile>(file, 0, file->GetSize())); + return std::make_shared<FileSys::OffsetVfsFile>(file, 0, file->GetSize()); } - return MakeResult<FileSys::VirtualFile>(file); + return file; } ResultVal<FileSys::VirtualDir> VfsDirectoryServiceWrapper::OpenDirectory(const std::string& path_) { @@ -240,7 +237,7 @@ ResultVal<FileSys::VirtualDir> VfsDirectoryServiceWrapper::OpenDirectory(const s // TODO(DarkLordZach): Find a better error code for this return FileSys::ERROR_PATH_NOT_FOUND; } - return MakeResult(dir); + return dir; } ResultVal<FileSys::EntryType> VfsDirectoryServiceWrapper::GetEntryType( @@ -252,12 +249,12 @@ ResultVal<FileSys::EntryType> VfsDirectoryServiceWrapper::GetEntryType( auto filename = Common::FS::GetFilename(path); // TODO(Subv): Some games use the '/' path, find out what this means. if (filename.empty()) - return MakeResult(FileSys::EntryType::Directory); + return FileSys::EntryType::Directory; if (dir->GetFile(filename) != nullptr) - return MakeResult(FileSys::EntryType::File); + return FileSys::EntryType::File; if (dir->GetSubdirectory(filename) != nullptr) - return MakeResult(FileSys::EntryType::Directory); + return FileSys::EntryType::Directory; return FileSys::ERROR_PATH_NOT_FOUND; } @@ -270,7 +267,7 @@ ResultVal<FileSys::FileTimeStampRaw> VfsDirectoryServiceWrapper::GetFileTimeStam if (GetEntryType(path).Failed()) { return FileSys::ERROR_PATH_NOT_FOUND; } - return MakeResult(dir->GetFileTimeStamp(Common::FS::GetFilename(path))); + return dir->GetFileTimeStamp(Common::FS::GetFilename(path)); } FileSystemController::FileSystemController(Core::System& system_) : system{system_} {} @@ -322,7 +319,7 @@ ResultVal<FileSys::VirtualFile> FileSystemController::OpenRomFSCurrentProcess() return ResultUnknown; } - return romfs_factory->OpenCurrentProcess(system.CurrentProcess()->GetTitleID()); + return romfs_factory->OpenCurrentProcess(system.GetCurrentProcessProgramID()); } ResultVal<FileSys::VirtualFile> FileSystemController::OpenPatchedRomFS( @@ -395,7 +392,7 @@ ResultVal<FileSys::VirtualDir> FileSystemController::OpenSaveDataSpace( return FileSys::ERROR_ENTITY_NOT_FOUND; } - return MakeResult(save_data_factory->GetSaveDataSpaceDirectory(space)); + return save_data_factory->GetSaveDataSpaceDirectory(space); } ResultVal<FileSys::VirtualDir> FileSystemController::OpenSDMC() const { @@ -421,7 +418,7 @@ ResultVal<FileSys::VirtualDir> FileSystemController::OpenBISPartition( return FileSys::ERROR_INVALID_ARGUMENT; } - return MakeResult<FileSys::VirtualDir>(std::move(part)); + return part; } ResultVal<FileSys::VirtualFile> FileSystemController::OpenBISPartitionStorage( @@ -437,7 +434,7 @@ ResultVal<FileSys::VirtualFile> FileSystemController::OpenBISPartitionStorage( return FileSys::ERROR_INVALID_ARGUMENT; } - return MakeResult<FileSys::VirtualFile>(std::move(part)); + return part; } u64 FileSystemController::GetFreeSpaceSize(FileSys::StorageId id) const { @@ -507,7 +504,7 @@ FileSys::SaveDataSize FileSystemController::ReadSaveDataSize(FileSys::SaveDataTy const auto res = system.GetAppLoader().ReadControlData(nacp); if (res != Loader::ResultStatus::Success) { - const FileSys::PatchManager pm{system.CurrentProcess()->GetTitleID(), + const FileSys::PatchManager pm{system.GetCurrentProcessProgramID(), system.GetFileSystemController(), system.GetContentProvider()}; const auto metadata = pm.GetControlMetadata(); diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index 50c788dd6..3501bc1a4 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -26,7 +26,6 @@ #include "core/file_sys/system_archive/system_archive.h" #include "core/file_sys/vfs.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/filesystem/fsp_srv.h" #include "core/reporter.h" @@ -1035,7 +1034,7 @@ void FSP_SRV::OpenDataStorageWithProgramIndex(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_FS, "called, program_index={}", program_index); auto patched_romfs = fsc.OpenPatchedRomFSWithProgramIndex( - system.CurrentProcess()->GetTitleID(), program_index, FileSys::ContentRecordType::Program); + system.GetCurrentProcessProgramID(), program_index, FileSys::ContentRecordType::Program); if (patched_romfs.Failed()) { // TODO: Find the right error code to use here diff --git a/src/core/hle/service/glue/arp.cpp b/src/core/hle/service/glue/arp.cpp index 70cd63c6b..2feead2aa 100644 --- a/src/core/hle/service/glue/arp.cpp +++ b/src/core/hle/service/glue/arp.cpp @@ -6,7 +6,6 @@ #include "common/logging/log.h" #include "core/core.h" -#include "core/file_sys/control_metadata.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/kernel.h" @@ -27,7 +26,7 @@ std::optional<u64> GetTitleIDForProcessID(const Core::System& system, u64 proces return std::nullopt; } - return (*iter)->GetTitleID(); + return (*iter)->GetProgramID(); } } // Anonymous namespace diff --git a/src/core/hle/service/glue/glue_manager.cpp b/src/core/hle/service/glue/glue_manager.cpp index aa9d48c0c..48e133b48 100644 --- a/src/core/hle/service/glue/glue_manager.cpp +++ b/src/core/hle/service/glue/glue_manager.cpp @@ -26,7 +26,7 @@ ResultVal<ApplicationLaunchProperty> ARPManager::GetLaunchProperty(u64 title_id) return ERR_NOT_REGISTERED; } - return MakeResult<ApplicationLaunchProperty>(iter->second.launch); + return iter->second.launch; } ResultVal<std::vector<u8>> ARPManager::GetControlProperty(u64 title_id) const { @@ -39,7 +39,7 @@ ResultVal<std::vector<u8>> ARPManager::GetControlProperty(u64 title_id) const { return ERR_NOT_REGISTERED; } - return MakeResult<std::vector<u8>>(iter->second.control); + return iter->second.control; } ResultCode ARPManager::Register(u64 title_id, ApplicationLaunchProperty launch, diff --git a/src/core/hle/service/glue/glue_manager.h b/src/core/hle/service/glue/glue_manager.h index a7f5ce3ee..4bc5297c6 100644 --- a/src/core/hle/service/glue/glue_manager.h +++ b/src/core/hle/service/glue/glue_manager.h @@ -7,7 +7,6 @@ #include <map> #include <vector> #include "common/common_types.h" -#include "core/file_sys/control_metadata.h" #include "core/file_sys/romfs_factory.h" #include "core/hle/result.h" diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index b7f551e40..196876810 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -16,7 +16,6 @@ #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_writable_event.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/hid/controllers/npad.h" #include "core/hle/service/kernel_helpers.h" diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 043320d50..10c64d41a 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -8,13 +8,11 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/frontend/emu_window.h" #include "core/frontend/input.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_shared_memory.h" #include "core/hle/kernel/k_transfer_memory.h" -#include "core/hle/kernel/k_writable_event.h" #include "core/hle/kernel/kernel.h" #include "core/hle/service/hid/errors.h" #include "core/hle/service/hid/hid.h" diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp index 4a92c6234..8812b8ecb 100644 --- a/src/core/hle/service/hid/irs.cpp +++ b/src/core/hle/service/hid/irs.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/swap.h" #include "core/core.h" #include "core/core_timing.h" #include "core/hle/ipc_helpers.h" diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 24b7e4435..32eff3b2a 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -12,7 +12,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_page_table.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_system_control.h" #include "core/hle/kernel/svc_results.h" #include "core/hle/service/ldr/ldr.h" @@ -247,7 +246,7 @@ public: return; } - if (system.CurrentProcess()->GetTitleID() != header.application_id) { + if (system.GetCurrentProcessProgramID() != header.application_id) { LOG_ERROR(Service_LDR, "Attempting to load NRR with title ID other than current process. (actual " "{:016X})!", @@ -335,7 +334,7 @@ public: CASCADE_CODE(result); if (ValidateRegionForMap(page_table, addr, size)) { - return MakeResult<VAddr>(addr); + return addr; } } @@ -371,7 +370,7 @@ public: } if (ValidateRegionForMap(page_table, addr, size)) { - return MakeResult<VAddr>(addr); + return addr; } } diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp index 794504314..e40383134 100644 --- a/src/core/hle/service/lm/lm.cpp +++ b/src/core/hle/service/lm/lm.cpp @@ -2,19 +2,16 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <sstream> #include <string> #include <optional> #include <unordered_map> #include <boost/container_hash/hash.hpp> #include "common/logging/log.h" -#include "common/scope_exit.h" #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/service/lm/lm.h" #include "core/hle/service/service.h" -#include "core/memory.h" namespace Service::LM { enum class LogSeverity : u8 { diff --git a/src/core/hle/service/mii/mii_manager.cpp b/src/core/hle/service/mii/mii_manager.cpp index 4fef2aea4..ca4ed35bb 100644 --- a/src/core/hle/service/mii/mii_manager.cpp +++ b/src/core/hle/service/mii/mii_manager.cpp @@ -443,14 +443,14 @@ ResultVal<std::vector<MiiInfoElement>> MiiManager::GetDefault(SourceFlag source_ std::vector<MiiInfoElement> result; if ((source_flag & SourceFlag::Default) == SourceFlag::None) { - return MakeResult(std::move(result)); + return result; } for (std::size_t index = BaseMiiCount; index < DefaultMiiCount; index++) { result.emplace_back(BuildDefault(index), Source::Default); } - return MakeResult(std::move(result)); + return result; } ResultCode MiiManager::GetIndex([[maybe_unused]] const MiiInfo& info, u32& index) { diff --git a/src/core/hle/service/mii/raw_data.h b/src/core/hle/service/mii/raw_data.h index a0d2b9d3a..bd90c2162 100644 --- a/src/core/hle/service/mii/raw_data.h +++ b/src/core/hle/service/mii/raw_data.h @@ -6,7 +6,6 @@ #include <array> -#include "common/common_types.h" #include "core/hle/service/mii/mii_manager.h" namespace Service::Mii::RawData { diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp index 6791f20a5..693ffc71a 100644 --- a/src/core/hle/service/nfp/nfp.cpp +++ b/src/core/hle/service/nfp/nfp.cpp @@ -9,8 +9,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/lock.h" #include "core/hle/service/nfp/nfp.h" #include "core/hle/service/nfp/nfp_user.h" diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index f13dc8b0d..a253dd066 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/settings.h" #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp index 30fb060b8..196f274e1 100644 --- a/src/core/hle/service/nim/nim.cpp +++ b/src/core/hle/service/nim/nim.cpp @@ -7,7 +7,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/kernel_helpers.h" #include "core/hle/service/nim/nim.h" #include "core/hle/service/service.h" diff --git a/src/core/hle/service/ns/language.h b/src/core/hle/service/ns/language.h index d84c3f277..2cc8e4806 100644 --- a/src/core/hle/service/ns/language.h +++ b/src/core/hle/service/ns/language.h @@ -5,7 +5,6 @@ #pragma once #include <optional> -#include <string> #include "common/common_types.h" namespace Service::Set { @@ -43,4 +42,4 @@ constexpr u32 GetSupportedLanguageFlag(const ApplicationLanguage lang) { const ApplicationLanguagePriorityList* GetApplicationLanguagePriorityList(ApplicationLanguage lang); std::optional<ApplicationLanguage> ConvertToApplicationLanguage(Set::LanguageCode language_code); std::optional<Set::LanguageCode> ConvertToLanguageCode(ApplicationLanguage lang); -} // namespace Service::NS
\ No newline at end of file +} // namespace Service::NS diff --git a/src/core/hle/service/ns/ns.cpp b/src/core/hle/service/ns/ns.cpp index 931b48f72..64ffc8572 100644 --- a/src/core/hle/service/ns/ns.cpp +++ b/src/core/hle/service/ns/ns.cpp @@ -414,7 +414,7 @@ ResultVal<u8> IApplicationManagerInterface::GetApplicationDesiredLanguage( for (const auto lang : *priority_list) { const auto supported_flag = GetSupportedLanguageFlag(lang); if (supported_languages == 0 || (supported_languages & supported_flag) == supported_flag) { - return MakeResult(static_cast<u8>(lang)); + return static_cast<u8>(lang); } } @@ -448,7 +448,7 @@ ResultVal<u64> IApplicationManagerInterface::ConvertApplicationLanguageToLanguag return ERR_APPLICATION_LANGUAGE_NOT_FOUND; } - return MakeResult(static_cast<u64>(*language_code)); + return static_cast<u64>(*language_code); } IApplicationVersionInterface::IApplicationVersionInterface(Core::System& system_) diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index 5b73a5a34..3d874243a 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h @@ -5,11 +5,8 @@ #pragma once #include <vector> -#include "common/bit_field.h" #include "common/common_types.h" -#include "common/swap.h" #include "core/hle/service/nvdrv/nvdata.h" -#include "core/hle/service/service.h" namespace Core { class System; diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 4ee8c5733..68f1e9060 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -10,7 +10,6 @@ #include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/perf_stats.h" #include "video_core/gpu.h" -#include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 7dc41d875..85170cdb3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -10,10 +10,8 @@ #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" #include "core/hle/service/nvdrv/devices/nvmap.h" -#include "core/memory.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" -#include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index d8b684f4f..05b4e2151 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -7,7 +7,6 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h" namespace Service::Nvidia::Devices { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 54ac105d5..0a043e386 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -10,7 +10,6 @@ #include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/memory.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" namespace Service::Nvidia::Devices { namespace { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 6c1edce33..0d7d4ad03 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -6,7 +6,6 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" -#include "video_core/memory_manager.h" #include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index ae4199b79..351625c17 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -4,7 +4,6 @@ #pragma once -#include <map> #include <vector> #include "common/common_types.h" #include "common/swap.h" diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 21d101e8a..eac4dd530 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -6,7 +6,6 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_vic.h" -#include "video_core/memory_manager.h" #include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index ff405099a..aa7e47cbf 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_writable_event.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index d61fb73dc..c16babe14 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp @@ -7,9 +7,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_readable_event.h" -#include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/k_writable_event.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/nvdrv/nvdata.h" #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvdrv/nvdrv_interface.h" diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 78de3f354..7b7baeaea 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -8,7 +8,6 @@ #include <list> #include <mutex> #include <optional> -#include <vector> #include "common/common_funcs.h" #include "common/math_util.h" diff --git a/src/core/hle/service/pctl/pctl_module.cpp b/src/core/hle/service/pctl/pctl_module.cpp index 6949fcf3b..240776101 100644 --- a/src/core/hle/service/pctl/pctl_module.cpp +++ b/src/core/hle/service/pctl/pctl_module.cpp @@ -7,7 +7,6 @@ #include "core/file_sys/control_metadata.h" #include "core/file_sys/patch_manager.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/service/pctl/pctl.h" #include "core/hle/service/pctl/pctl_module.h" @@ -45,7 +44,7 @@ public: {1014, nullptr, "ConfirmPlayableApplicationVideoOld"}, {1015, nullptr, "ConfirmPlayableApplicationVideo"}, {1016, nullptr, "ConfirmShowNewsPermission"}, - {1017, nullptr, "EndFreeCommunication"}, + {1017, &IParentalControlService::EndFreeCommunication, "EndFreeCommunication"}, {1018, &IParentalControlService::IsFreeCommunicationAvailable, "IsFreeCommunicationAvailable"}, {1031, &IParentalControlService::IsRestrictionEnabled, "IsRestrictionEnabled"}, {1032, nullptr, "GetSafetyLevel"}, @@ -189,7 +188,7 @@ private: // TODO(ogniK): Recovery flag initialization for pctl:r - const auto tid = system.CurrentProcess()->GetTitleID(); + const auto tid = system.GetCurrentProcessProgramID(); if (tid != 0) { const FileSys::PatchManager pm{tid, system.GetFileSystemController(), system.GetContentProvider()}; @@ -237,6 +236,13 @@ private: rb.Push(ResultSuccess); } + void EndFreeCommunication(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_PCTL, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); + } + void IsFreeCommunicationAvailable(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_PCTL, "(STUBBED) called"); diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp index f675740b4..88fc5b5cc 100644 --- a/src/core/hle/service/pm/pm.cpp +++ b/src/core/hle/service/pm/pm.cpp @@ -101,7 +101,7 @@ private: const auto process = SearchProcessList(kernel.GetProcessList(), [title_id](const auto& proc) { - return proc->GetTitleID() == title_id; + return proc->GetProgramID() == title_id; }); if (!process.has_value()) { @@ -152,7 +152,7 @@ private: IPC::ResponseBuilder rb{ctx, 4}; rb.Push(ResultSuccess); - rb.Push((*process)->GetTitleID()); + rb.Push((*process)->GetProgramID()); } const std::vector<Kernel::KProcess*>& process_list; diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp index 32db6834c..5c8a44688 100644 --- a/src/core/hle/service/prepo/prepo.cpp +++ b/src/core/hle/service/prepo/prepo.cpp @@ -6,7 +6,6 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/service/acc/profile_manager.h" #include "core/hle/service/prepo/prepo.h" #include "core/hle/service/service.h" @@ -73,7 +72,7 @@ private: Type, process_id, data1.size(), data2.size()); const auto& reporter{system.GetReporter()}; - reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2}, + reporter.SavePlayReport(Type, system.GetCurrentProcessProgramID(), {data1, data2}, process_id); IPC::ResponseBuilder rb{ctx, 2}; @@ -101,7 +100,7 @@ private: Type, user_id[1], user_id[0], process_id, data1.size(), data2.size()); const auto& reporter{system.GetReporter()}; - reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2}, + reporter.SavePlayReport(Type, system.GetCurrentProcessProgramID(), {data1, data2}, process_id, user_id); IPC::ResponseBuilder rb{ctx, 2}; diff --git a/src/core/hle/service/ptm/psm.cpp b/src/core/hle/service/ptm/psm.cpp index 22ff5269c..5d248f671 100644 --- a/src/core/hle/service/ptm/psm.cpp +++ b/src/core/hle/service/ptm/psm.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_event.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/kernel_helpers.h" #include "core/hle/service/ptm/psm.h" #include "core/hle/service/service.h" diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index 065133166..f54e6fe56 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp @@ -2,19 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <algorithm> #include <fmt/format.h> #include "common/assert.h" #include "common/logging/log.h" #include "common/settings.h" -#include "common/string_util.h" #include "core/core.h" #include "core/hle/ipc.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/k_client_port.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_server_port.h" -#include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/kernel.h" #include "core/hle/service/acc/acc.h" #include "core/hle/service/am/am.h" diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp index ae4dc4a75..eaa172595 100644 --- a/src/core/hle/service/sm/sm.cpp +++ b/src/core/hle/service/sm/sm.cpp @@ -12,8 +12,6 @@ #include "core/hle/kernel/k_port.h" #include "core/hle/kernel/k_scoped_resource_reservation.h" #include "core/hle/kernel/k_server_port.h" -#include "core/hle/kernel/k_server_session.h" -#include "core/hle/kernel/k_session.h" #include "core/hle/result.h" #include "core/hle/service/sm/sm.h" #include "core/hle/service/sm/sm_controller.h" @@ -87,7 +85,7 @@ ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name auto handler = it->second; port->GetServerPort().SetSessionHandler(std::move(handler)); - return MakeResult(port); + return port; } /** @@ -165,7 +163,7 @@ ResultVal<Kernel::KClientSession*> SM::GetServiceImpl(Kernel::HLERequestContext& LOG_DEBUG(Service_SM, "called service={} -> session={}", name, session->GetId()); - return MakeResult(session); + return session; } void SM::RegisterService(Kernel::HLERequestContext& ctx) { diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h index 068c78588..021eb51b4 100644 --- a/src/core/hle/service/sm/sm.h +++ b/src/core/hle/service/sm/sm.h @@ -6,7 +6,6 @@ #include <memory> #include <string> -#include <type_traits> #include <unordered_map> #include "common/concepts.h" diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp index b5fbc4569..09f9ecee1 100644 --- a/src/core/hle/service/sm/sm_controller.cpp +++ b/src/core/hle/service/sm/sm_controller.cpp @@ -7,10 +7,8 @@ #include "core/core.h" #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_client_port.h" -#include "core/hle/kernel/k_client_session.h" #include "core/hle/kernel/k_port.h" #include "core/hle/kernel/k_scoped_resource_reservation.h" -#include "core/hle/kernel/k_server_port.h" #include "core/hle/kernel/k_server_session.h" #include "core/hle/kernel/k_session.h" #include "core/hle/service/sm/sm_controller.h" diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index b9e765f1d..f83272633 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp @@ -4,7 +4,6 @@ #include <array> #include <memory> -#include <string> #include <utility> #include <vector> diff --git a/src/core/hle/service/spl/spl_module.cpp b/src/core/hle/service/spl/spl_module.cpp index ed4c06260..10f7d1461 100644 --- a/src/core/hle/service/spl/spl_module.cpp +++ b/src/core/hle/service/spl/spl_module.cpp @@ -120,40 +120,40 @@ ResultVal<u64> Module::Interface::GetConfigImpl(ConfigItem config_item) const { return ResultSecureMonitorNotImplemented; case ConfigItem::ExosphereApiVersion: // Get information about the current exosphere version. - return MakeResult((u64{HLE::ApiVersion::ATMOSPHERE_RELEASE_VERSION_MAJOR} << 56) | - (u64{HLE::ApiVersion::ATMOSPHERE_RELEASE_VERSION_MINOR} << 48) | - (u64{HLE::ApiVersion::ATMOSPHERE_RELEASE_VERSION_MICRO} << 40) | - (static_cast<u64>(HLE::ApiVersion::GetTargetFirmware()))); + return (u64{HLE::ApiVersion::ATMOSPHERE_RELEASE_VERSION_MAJOR} << 56) | + (u64{HLE::ApiVersion::ATMOSPHERE_RELEASE_VERSION_MINOR} << 48) | + (u64{HLE::ApiVersion::ATMOSPHERE_RELEASE_VERSION_MICRO} << 40) | + (static_cast<u64>(HLE::ApiVersion::GetTargetFirmware())); case ConfigItem::ExosphereNeedsReboot: // We are executing, so we aren't in the process of rebooting. - return MakeResult(u64{0}); + return u64{0}; case ConfigItem::ExosphereNeedsShutdown: // We are executing, so we aren't in the process of shutting down. - return MakeResult(u64{0}); + return u64{0}; case ConfigItem::ExosphereGitCommitHash: // Get information about the current exosphere git commit hash. - return MakeResult(u64{0}); + return u64{0}; case ConfigItem::ExosphereHasRcmBugPatch: // Get information about whether this unit has the RCM bug patched. - return MakeResult(u64{0}); + return u64{0}; case ConfigItem::ExosphereBlankProdInfo: // Get whether this unit should simulate a "blanked" PRODINFO. - return MakeResult(u64{0}); + return u64{0}; case ConfigItem::ExosphereAllowCalWrites: // Get whether this unit should allow writing to the calibration partition. - return MakeResult(u64{0}); + return u64{0}; case ConfigItem::ExosphereEmummcType: // Get what kind of emummc this unit has active. - return MakeResult(u64{0}); + return u64{0}; case ConfigItem::ExospherePayloadAddress: // Gets the physical address of the reboot payload buffer, if one exists. return ResultSecureMonitorNotInitialized; case ConfigItem::ExosphereLogConfiguration: // Get the log configuration. - return MakeResult(u64{0}); + return u64{0}; case ConfigItem::ExosphereForceEnableUsb30: // Get whether usb 3.0 should be force-enabled. - return MakeResult(u64{0}); + return u64{0}; default: return ResultSecureMonitorInvalidArgument; } diff --git a/src/core/hle/service/time/local_system_clock_context_writer.h b/src/core/hle/service/time/local_system_clock_context_writer.h index 6be617392..ac6c7b4b1 100644 --- a/src/core/hle/service/time/local_system_clock_context_writer.h +++ b/src/core/hle/service/time/local_system_clock_context_writer.h @@ -4,7 +4,6 @@ #pragma once -#include "core/hle/service/time/errors.h" #include "core/hle/service/time/system_clock_context_update_callback.h" #include "core/hle/service/time/time_sharedmemory.h" diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp index 59a272f4a..a1ffdd524 100644 --- a/src/core/hle/service/time/standard_steady_clock_core.cpp +++ b/src/core/hle/service/time/standard_steady_clock_core.cpp @@ -4,7 +4,6 @@ #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/hardware_properties.h" #include "core/hle/service/time/standard_steady_clock_core.h" diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp index 8baaa2a6a..47d4ab980 100644 --- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp +++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp @@ -4,7 +4,6 @@ #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/hardware_properties.h" #include "core/hle/service/time/tick_based_steady_clock_core.h" diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index d84a111c2..4d8823b5a 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -5,10 +5,8 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/hardware_properties.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" #include "core/hle/service/time/time.h" #include "core/hle/service/time/time_interface.h" diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp index 9c4c960ef..c1e4e6cce 100644 --- a/src/core/hle/service/time/time_manager.cpp +++ b/src/core/hle/service/time/time_manager.cpp @@ -8,8 +8,10 @@ #include "common/settings.h" #include "common/time_zone.h" #include "core/hle/service/time/ephemeral_network_system_clock_context_writer.h" +#include "core/hle/service/time/ephemeral_network_system_clock_core.h" #include "core/hle/service/time/local_system_clock_context_writer.h" #include "core/hle/service/time/network_system_clock_context_writer.h" +#include "core/hle/service/time/tick_based_steady_clock_core.h" #include "core/hle/service/time/time_manager.h" namespace Service::Time { diff --git a/src/core/hle/service/time/time_manager.h b/src/core/hle/service/time/time_manager.h index 3af868d87..2404067c0 100644 --- a/src/core/hle/service/time/time_manager.h +++ b/src/core/hle/service/time/time_manager.h @@ -5,15 +5,12 @@ #pragma once #include "common/common_types.h" -#include "common/time_zone.h" #include "core/file_sys/vfs_types.h" #include "core/hle/service/time/clock_types.h" -#include "core/hle/service/time/ephemeral_network_system_clock_core.h" #include "core/hle/service/time/standard_local_system_clock_core.h" #include "core/hle/service/time/standard_network_system_clock_core.h" #include "core/hle/service/time/standard_steady_clock_core.h" #include "core/hle/service/time/standard_user_system_clock_core.h" -#include "core/hle/service/time/tick_based_steady_clock_core.h" #include "core/hle/service/time/time_sharedmemory.h" #include "core/hle/service/time/time_zone_content_manager.h" diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index 176ad0eee..ed9f75ed6 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -4,7 +4,6 @@ #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/hardware_properties.h" #include "core/hle/kernel/kernel.h" #include "core/hle/service/time/clock_types.h" diff --git a/src/core/hle/service/time/time_sharedmemory.h b/src/core/hle/service/time/time_sharedmemory.h index d471b5d18..9307ea795 100644 --- a/src/core/hle/service/time/time_sharedmemory.h +++ b/src/core/hle/service/time/time_sharedmemory.h @@ -7,7 +7,6 @@ #include "common/common_types.h" #include "common/uuid.h" #include "core/hle/kernel/k_shared_memory.h" -#include "core/hle/kernel/k_thread.h" #include "core/hle/service/time/clock_types.h" namespace Service::Time { diff --git a/src/core/hle/service/time/time_zone_manager.cpp b/src/core/hle/service/time/time_zone_manager.cpp index 6da893790..2989cee5e 100644 --- a/src/core/hle/service/time/time_zone_manager.cpp +++ b/src/core/hle/service/time/time_zone_manager.cpp @@ -9,8 +9,6 @@ #include "core/file_sys/content_archive.h" #include "core/file_sys/nca_metadata.h" #include "core/file_sys/registered_cache.h" -#include "core/file_sys/romfs.h" -#include "core/file_sys/system_archive/system_archive.h" #include "core/hle/service/time/time_zone_manager.h" namespace Service::Time::TimeZone { diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 439e7e472..75ee3e5e4 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -21,9 +21,7 @@ #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/k_writable_event.h" #include "core/hle/service/nvdrv/nvdata.h" -#include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvflinger/buffer_queue.h" #include "core/hle/service/nvflinger/nvflinger.h" #include "core/hle/service/service.h" @@ -543,11 +541,8 @@ private: switch (transaction) { case TransactionId::Connect: { IGBPConnectRequestParcel request{ctx.ReadBuffer()}; - IGBPConnectResponseParcel response{ - static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * - Settings::values.resolution_factor.GetValue()), - static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * - Settings::values.resolution_factor.GetValue())}; + IGBPConnectResponseParcel response{static_cast<u32>(DisplayResolution::UndockedWidth), + static_cast<u32>(DisplayResolution::UndockedHeight)}; buffer_queue.Connect(); @@ -777,15 +772,11 @@ private: rb.Push(ResultSuccess); if (Settings::values.use_docked_mode.GetValue()) { - rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth)); + rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight)); } else { - rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth)); + rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight)); } rb.PushRaw<float>(60.0f); // This wouldn't seem to be correct for 30 fps games. @@ -1065,10 +1056,8 @@ private: // This only returns the fixed values of 1280x720 and makes no distinguishing // between docked and undocked dimensions. We take the liberty of applying // the resolution scaling factor here. - rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight) * - static_cast<u32>(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth)); + rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight)); } void SetLayerScalingMode(Kernel::HLERequestContext& ctx) { @@ -1101,8 +1090,6 @@ private: LOG_WARNING(Service_VI, "(STUBBED) called"); DisplayInfo display_info; - display_info.width *= static_cast<u64>(Settings::values.resolution_factor.GetValue()); - display_info.height *= static_cast<u64>(Settings::values.resolution_factor.GetValue()); ctx.WriteBuffer(&display_info, sizeof(DisplayInfo)); IPC::ResponseBuilder rb{ctx, 4}; rb.Push(ResultSuccess); @@ -1284,15 +1271,15 @@ private: static ResultVal<ConvertedScaleMode> ConvertScalingModeImpl(NintendoScaleMode mode) { switch (mode) { case NintendoScaleMode::None: - return MakeResult(ConvertedScaleMode::None); + return ConvertedScaleMode::None; case NintendoScaleMode::Freeze: - return MakeResult(ConvertedScaleMode::Freeze); + return ConvertedScaleMode::Freeze; case NintendoScaleMode::ScaleToWindow: - return MakeResult(ConvertedScaleMode::ScaleToWindow); + return ConvertedScaleMode::ScaleToWindow; case NintendoScaleMode::ScaleAndCrop: - return MakeResult(ConvertedScaleMode::ScaleAndCrop); + return ConvertedScaleMode::ScaleAndCrop; case NintendoScaleMode::PreserveAspectRatio: - return MakeResult(ConvertedScaleMode::PreserveAspectRatio); + return ConvertedScaleMode::PreserveAspectRatio; default: LOG_ERROR(Service_VI, "Invalid scaling mode specified, mode={}", mode); return ERR_OPERATION_FAILED; diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index a19bb220a..1b5aca65d 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -2,9 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cinttypes> #include <cstring> -#include "common/common_funcs.h" #include "common/logging/log.h" #include "core/core.h" #include "core/file_sys/content_archive.h" @@ -13,7 +11,6 @@ #include "core/file_sys/romfs_factory.h" #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" -#include "core/hle/kernel/kernel.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/loader/deconstructed_rom_directory.h" #include "core/loader/nso.h" diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 3d9276f15..d0250bdb4 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -4,7 +4,6 @@ #include <cstring> #include <memory> -#include <string> #include "common/common_funcs.h" #include "common/common_types.h" #include "common/logging/log.h" diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h index 890299a20..bff51ec17 100644 --- a/src/core/loader/elf.h +++ b/src/core/loader/elf.h @@ -4,8 +4,6 @@ #pragma once -#include <string> -#include "common/common_types.h" #include "core/loader/loader.h" namespace Core { diff --git a/src/core/loader/nax.cpp b/src/core/loader/nax.cpp index aceb66414..3375dab7c 100644 --- a/src/core/loader/nax.cpp +++ b/src/core/loader/nax.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/logging/log.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/romfs.h" #include "core/file_sys/xts_archive.h" diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp index aa51b0daa..219bbeaf5 100644 --- a/src/core/loader/nca.cpp +++ b/src/core/loader/nca.cpp @@ -4,7 +4,6 @@ #include <utility> -#include "common/logging/log.h" #include "core/core.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/romfs_factory.h" diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h index f2ff080bb..c0db8c740 100644 --- a/src/core/loader/nca.h +++ b/src/core/loader/nca.h @@ -5,7 +5,6 @@ #pragma once #include "common/common_types.h" -#include "core/file_sys/vfs.h" #include "core/loader/loader.h" namespace Core { diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp index 8b167ad3c..f7ccc678d 100644 --- a/src/core/loader/nsp.cpp +++ b/src/core/loader/nsp.cpp @@ -6,7 +6,6 @@ #include "common/common_types.h" #include "core/core.h" -#include "core/file_sys/card_image.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/control_metadata.h" #include "core/file_sys/nca_metadata.h" diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h index 50406a92e..378e4077a 100644 --- a/src/core/loader/nsp.h +++ b/src/core/loader/nsp.h @@ -6,7 +6,6 @@ #include <memory> #include "common/common_types.h" -#include "core/file_sys/vfs.h" #include "core/loader/loader.h" namespace FileSys { diff --git a/src/core/loader/xci.cpp b/src/core/loader/xci.cpp index 269603eef..8c6c1a3fd 100644 --- a/src/core/loader/xci.cpp +++ b/src/core/loader/xci.cpp @@ -11,7 +11,6 @@ #include "core/file_sys/control_metadata.h" #include "core/file_sys/patch_manager.h" #include "core/file_sys/registered_cache.h" -#include "core/file_sys/romfs.h" #include "core/file_sys/submission_package.h" #include "core/hle/kernel/k_process.h" #include "core/hle/service/filesystem/filesystem.h" diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h index 30caaf90e..6e3810e48 100644 --- a/src/core/loader/xci.h +++ b/src/core/loader/xci.h @@ -6,7 +6,6 @@ #include <memory> #include "common/common_types.h" -#include "core/file_sys/vfs.h" #include "core/loader/loader.h" namespace FileSys { diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index 46a7e09b4..20f0e90f5 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -8,8 +8,6 @@ #include "common/swap.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" -#include "core/hardware_properties.h" #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" #include "core/hle/service/hid/controllers/npad.h" @@ -194,7 +192,7 @@ void CheatEngine::Initialize() { core_timing.ScheduleEvent(CHEAT_ENGINE_NS, event); metadata.process_id = system.CurrentProcess()->GetProcessID(); - metadata.title_id = system.CurrentProcess()->GetTitleID(); + metadata.title_id = system.GetCurrentProcessProgramID(); const auto& page_table = system.CurrentProcess()->PageTable(); metadata.heap_extents = { diff --git a/src/core/network/network.h b/src/core/network/network.h index fdd3e4655..e85df3ab7 100644 --- a/src/core/network/network.h +++ b/src/core/network/network.h @@ -6,7 +6,6 @@ #include <array> #include <optional> -#include <utility> #include "common/common_funcs.h" #include "common/common_types.h" diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp index c9ded49d0..52c43c857 100644 --- a/src/core/perf_stats.cpp +++ b/src/core/perf_stats.cpp @@ -14,7 +14,6 @@ #include "common/fs/file.h" #include "common/fs/fs.h" #include "common/fs/path_util.h" -#include "common/math_util.h" #include "common/settings.h" #include "core/perf_stats.h" diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index 365b8f906..d4becdc0a 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -236,7 +236,7 @@ void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64 } const auto timestamp = GetTimestamp(); - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); auto out = GetFullDataAuto(timestamp, title_id, system); auto break_out = json{ @@ -263,7 +263,7 @@ void Reporter::SaveUnimplementedFunctionReport(Kernel::HLERequestContext& ctx, u } const auto timestamp = GetTimestamp(); - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); auto out = GetFullDataAuto(timestamp, title_id, system); auto function_out = GetHLERequestContextData(ctx, system.Memory()); @@ -285,7 +285,7 @@ void Reporter::SaveUnimplementedAppletReport( } const auto timestamp = GetTimestamp(); - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); auto out = GetFullDataAuto(timestamp, title_id, system); out["applet_common_args"] = { @@ -377,7 +377,7 @@ void Reporter::SaveUserReport() const { } const auto timestamp = GetTimestamp(); - const auto title_id = system.CurrentProcess()->GetTitleID(); + const auto title_id = system.GetCurrentProcessProgramID(); SaveToFile(GetFullDataAuto(timestamp, title_id, system), GetPath("user_report", title_id, timestamp)); diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 191475f71..654db0b52 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -229,8 +229,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue()); AddField(field_type, "Renderer_Backend", TranslateRenderer(Settings::values.renderer_backend.GetValue())); - AddField(field_type, "Renderer_ResolutionFactor", - Settings::values.resolution_factor.GetValue()); AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue()); AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue()); AddField(field_type, "Renderer_UseDiskShaderCache", diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp index 2e09faa6d..032c71aff 100644 --- a/src/core/tools/freezer.cpp +++ b/src/core/tools/freezer.cpp @@ -6,8 +6,6 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" -#include "core/hardware_properties.h" #include "core/memory.h" #include "core/tools/freezer.h" diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b5b7e5e83..bc3df80c8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -221,6 +221,7 @@ add_library(shader_recompiler STATIC ir_opt/lower_fp16_to_fp32.cpp ir_opt/lower_int64_to_int32.cpp ir_opt/passes.h + ir_opt/rescaling_pass.cpp ir_opt/ssa_rewrite_pass.cpp ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h index 35503000c..669702553 100644 --- a/src/shader_recompiler/backend/bindings.h +++ b/src/shader_recompiler/backend/bindings.h @@ -14,6 +14,8 @@ struct Bindings { u32 storage_buffer{}; u32 texture{}; u32 image{}; + u32 texture_scaling_index{}; + u32 image_scaling_index{}; }; } // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 069c019ad..8fd459dfe 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" @@ -55,7 +56,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } if (!runtime_info.glasm_use_storage_buffers) { if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { - Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + const size_t index{num + PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE}; + Add("PARAM c[{}]={{program.local[0..{}]}};", index, index - 1); } } stage = program.stage; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 4ce1c4f54..004658546 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -448,6 +448,9 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); } + if (program.info.uses_rescaling_uniform) { + header += "PARAM scaling[1]={program.local[0..0]};"; + } header += "TEMP "; for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { header += fmt::format("R{},", index); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index bcb55f062..292655acb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -13,6 +13,8 @@ namespace Shader::Backend::GLASM { +constexpr u32 PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE = 1; + [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 09e3a9b82..d325d31c7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -608,6 +608,24 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Re ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); } +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + ctx.Add("AND.U RC.x,scaling[0].x,{};" + "SNE.S {},RC.x,0;", + 1u << index.U32(), ctx.reg_alloc.Define(inst)); +} + +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + ctx.Add("AND.U RC.x,scaling[0].y,{};" + "SNE.S {},RC.x,0;", + 1u << index.U32(), ctx.reg_alloc.Define(inst)); +} + void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, ScalarU32 value) { ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 12afda43b..1f343bff5 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -72,6 +72,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx, IR::Inst& inst); +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); @@ -303,6 +304,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); @@ -553,6 +556,8 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register color); +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); void EmitBindlessImageAtomicUMin32(EmitContext&); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index f55c26b76..8aa494a4d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -90,6 +90,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { ctx.Add("MUL.S {}.x,{},{};", inst, a, b); } +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("DIV.S {}.x,{},{};", inst, a, b); +} + +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { + ctx.Add("DIV.U {}.x,{},{};", inst, a, b); +} + void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) { ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32)); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index ff64c6924..681aeda8d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -18,7 +18,7 @@ namespace Shader::Backend::GLASM { #define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) static void DefinePhi(EmitContext& ctx, IR::Inst& phi) { - switch (phi.Arg(0).Type()) { + switch (phi.Type()) { case IR::Type::U1: case IR::Type::U32: case IR::Type::F32: @@ -210,6 +210,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); } +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.F {}.x,scaling[0].z;", inst); +} + void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { ctx.Add("MOV.S {}.x,0;", inst); } diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 4e6f2c0fe..97bd59302 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -393,6 +393,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineGenericOutput(index, program.invocations); } } + if (info.uses_rescaling_uniform) { + header += "layout(location=0) uniform vec4 scaling;"; + } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); SetupImages(bindings); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 170db269a..4c26f3829 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -445,6 +445,10 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); } +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { + ctx.AddF32("{}=scaling.z;", inst); +} + void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { ctx.AddU32("{}=lmem[{}];", inst, word_offset); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 447eb8e0a..2f78d0267 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -612,6 +612,22 @@ void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value value); } +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + const u32 image_index{index.U32()}; + ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index); +} + +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + const u32 image_index{index.U32()}; + ctx.AddU1("{}=(ftou(scaling.y)&{})!=0;", inst, 1u << image_index); +} + void EmitBindlessImageSampleImplicitLod(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 5936d086f..f86502e4c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -85,6 +85,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx, IR::Inst& inst); +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); @@ -362,6 +363,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); @@ -627,6 +630,8 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view color); +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); void EmitBindlessImageAtomicUMin32(EmitContext&); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 38419f88f..88c1d4c5e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -78,6 +78,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin ctx.AddU32("{}=uint({}*{});", inst, a, b); } +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=uint(int({})/int({}));", inst, a, b); +} + +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}={}/{};", inst, a, b); +} + void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=uint(-({}));", inst, value); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 9b866f889..67f9dad68 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -68,7 +68,7 @@ void EmitPhi(EmitContext& ctx, IR::Inst& phi) { } if (!phi.Definition<Id>().is_valid) { // The phi node wasn't forward defined - ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type()); + ctx.var_alloc.PhiDefine(phi, phi.Type()); } } @@ -80,7 +80,7 @@ void EmitReference(EmitContext& ctx, const IR::Value& value) { void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { IR::Inst& phi{*phi_value.InstRecursive()}; - const auto phi_type{phi.Arg(0).Type()}; + const auto phi_type{phi.Type()}; if (!phi.Definition<Id>().is_valid) { // The phi node wasn't forward defined ctx.var_alloc.PhiDefine(phi, phi_type); diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2885e6799..723455462 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -7,11 +7,14 @@ #include <climits> #include <string_view> +#include <boost/container/static_vector.hpp> + #include <fmt/format.h> #include "common/common_types.h" #include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/emit_context.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { namespace { @@ -430,15 +433,33 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { } } -size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations, - size_t start_offset) { +size_t FindAndSetNextUnusedLocation(std::bitset<IR::NUM_GENERICS>& used_locations, + size_t& start_offset) { for (size_t location = start_offset; location < used_locations.size(); ++location) { if (!used_locations.test(location)) { + start_offset = location; + used_locations.set(location); return location; } } throw RuntimeError("Unable to get an unused location for legacy attribute"); } + +Id DefineLegacyInput(EmitContext& ctx, std::bitset<IR::NUM_GENERICS>& used_locations, + size_t& start_offset) { + const Id id{DefineInput(ctx, ctx.F32[4], true)}; + const size_t location = FindAndSetNextUnusedLocation(used_locations, start_offset); + ctx.Decorate(id, spv::Decoration::Location, location); + return id; +} + +Id DefineLegacyOutput(EmitContext& ctx, std::bitset<IR::NUM_GENERICS>& used_locations, + size_t& start_offset, std::optional<u32> invocations) { + const Id id{DefineOutput(ctx, ctx.F32[4], invocations)}; + const size_t location = FindAndSetNextUnusedLocation(used_locations, start_offset); + ctx.Decorate(id, spv::Decoration::Location, location); + return id; +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -456,8 +477,9 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, IR::Program& program, Bindings& bindings) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, - runtime_info{runtime_info_}, stage{program.stage} { + : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, + stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index}, + image_rescaling_index{bindings.image_scaling_index} { const bool is_unified{profile.unified_descriptor_binding}; u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; @@ -474,10 +496,11 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf DefineStorageBuffers(program.info, storage_binding); DefineTextureBuffers(program.info, texture_binding); DefineImageBuffers(program.info, image_binding); - DefineTextures(program.info, texture_binding); - DefineImages(program.info, image_binding); + DefineTextures(program.info, texture_binding, bindings.texture_scaling_index); + DefineImages(program.info, image_binding, bindings.image_scaling_index); DefineAttributeMemAccess(program.info); DefineGlobalMemoryFunctions(program.info); + DefineRescalingInput(program.info); } EmitContext::~EmitContext() = default; @@ -520,6 +543,64 @@ Id EmitContext::BitOffset16(const IR::Value& offset) { return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u)); } +Id EmitContext::InputLegacyAttribute(IR::Attribute attribute) { + if (attribute >= IR::Attribute::ColorFrontDiffuseR && + attribute <= IR::Attribute::ColorFrontDiffuseA) { + return input_front_color; + } + if (attribute >= IR::Attribute::ColorFrontSpecularR && + attribute <= IR::Attribute::ColorFrontSpecularA) { + return input_front_secondary_color; + } + if (attribute >= IR::Attribute::ColorBackDiffuseR && + attribute <= IR::Attribute::ColorBackDiffuseA) { + return input_back_color; + } + if (attribute >= IR::Attribute::ColorBackSpecularR && + attribute <= IR::Attribute::ColorBackSpecularA) { + return input_back_secondary_color; + } + if (attribute == IR::Attribute::FogCoordinate) { + return input_fog_frag_coord; + } + if (attribute >= IR::Attribute::FixedFncTexture0S && + attribute <= IR::Attribute::FixedFncTexture9Q) { + u32 index = + (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; + return input_fixed_fnc_textures[index]; + } + throw InvalidArgument("Attribute is not legacy attribute {}", attribute); +} + +Id EmitContext::OutputLegacyAttribute(IR::Attribute attribute) { + if (attribute >= IR::Attribute::ColorFrontDiffuseR && + attribute <= IR::Attribute::ColorFrontDiffuseA) { + return output_front_color; + } + if (attribute >= IR::Attribute::ColorFrontSpecularR && + attribute <= IR::Attribute::ColorFrontSpecularA) { + return output_front_secondary_color; + } + if (attribute >= IR::Attribute::ColorBackDiffuseR && + attribute <= IR::Attribute::ColorBackDiffuseA) { + return output_back_color; + } + if (attribute >= IR::Attribute::ColorBackSpecularR && + attribute <= IR::Attribute::ColorBackSpecularA) { + return output_back_secondary_color; + } + if (attribute == IR::Attribute::FogCoordinate) { + return output_fog_frag_coord; + } + if (attribute >= IR::Attribute::FixedFncTexture0S && + attribute <= IR::Attribute::FixedFncTexture9Q) { + u32 index = + (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; + return output_fixed_fnc_textures[index]; + } + throw InvalidArgument("Attribute is not legacy attribute {}", attribute); +} + void EmitContext::DefineCommonTypes(const Info& info) { void_id = TypeVoid(); @@ -920,6 +1001,73 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); } +void EmitContext::DefineRescalingInput(const Info& info) { + if (!info.uses_rescaling_uniform) { + return; + } + if (profile.unified_descriptor_binding) { + DefineRescalingInputPushConstant(); + } else { + DefineRescalingInputUniformConstant(); + } +} + +void EmitContext::DefineRescalingInputPushConstant() { + boost::container::static_vector<Id, 3> members{}; + u32 member_index{0}; + + rescaling_textures_type = TypeArray(U32[1], Const(4u)); + Decorate(rescaling_textures_type, spv::Decoration::ArrayStride, 4u); + members.push_back(rescaling_textures_type); + rescaling_textures_member_index = member_index++; + + rescaling_images_type = TypeArray(U32[1], Const(NUM_IMAGE_SCALING_WORDS)); + Decorate(rescaling_images_type, spv::Decoration::ArrayStride, 4u); + members.push_back(rescaling_images_type); + rescaling_images_member_index = member_index++; + + if (stage != Stage::Compute) { + members.push_back(F32[1]); + rescaling_downfactor_member_index = member_index++; + } + const Id push_constant_struct{TypeStruct(std::span(members.data(), members.size()))}; + Decorate(push_constant_struct, spv::Decoration::Block); + Name(push_constant_struct, "ResolutionInfo"); + + MemberDecorate(push_constant_struct, rescaling_textures_member_index, spv::Decoration::Offset, + static_cast<u32>(offsetof(RescalingLayout, rescaling_textures))); + MemberName(push_constant_struct, rescaling_textures_member_index, "rescaling_textures"); + + MemberDecorate(push_constant_struct, rescaling_images_member_index, spv::Decoration::Offset, + static_cast<u32>(offsetof(RescalingLayout, rescaling_images))); + MemberName(push_constant_struct, rescaling_images_member_index, "rescaling_images"); + + if (stage != Stage::Compute) { + MemberDecorate(push_constant_struct, rescaling_downfactor_member_index, + spv::Decoration::Offset, + static_cast<u32>(offsetof(RescalingLayout, down_factor))); + MemberName(push_constant_struct, rescaling_downfactor_member_index, "down_factor"); + } + const Id pointer_type{TypePointer(spv::StorageClass::PushConstant, push_constant_struct)}; + rescaling_push_constants = AddGlobalVariable(pointer_type, spv::StorageClass::PushConstant); + Name(rescaling_push_constants, "rescaling_push_constants"); + + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(rescaling_push_constants); + } +} + +void EmitContext::DefineRescalingInputUniformConstant() { + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, F32[4])}; + rescaling_uniform_constant = + AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant); + Decorate(rescaling_uniform_constant, spv::Decoration::Location, 0u); + + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(rescaling_uniform_constant); + } +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; @@ -1108,7 +1256,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { } } -void EmitContext::DefineTextures(const Info& info, u32& binding) { +void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { const Id image_type{ImageType(*this, desc)}; @@ -1130,13 +1278,14 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { interfaces.push_back(id); } ++binding; + ++scaling_index; } if (info.uses_atomic_image_u32) { image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); } } -void EmitContext::DefineImages(const Info& info, u32& binding) { +void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_index) { images.reserve(info.image_descriptors.size()); for (const ImageDescriptor& desc : info.image_descriptors) { if (desc.count != 1) { @@ -1157,6 +1306,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { interfaces.push_back(id); } ++binding; + ++scaling_index; } } @@ -1279,22 +1429,26 @@ void EmitContext::DefineInputs(const IR::Program& program) { } size_t previous_unused_location = 0; if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { - const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); - previous_unused_location = location; - used_locations.set(location); - const Id id{DefineInput(*this, F32[4], true)}; - Decorate(id, spv::Decoration::Location, location); - input_front_color = id; + input_front_color = DefineLegacyInput(*this, used_locations, previous_unused_location); + } + if (loads.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { + input_front_secondary_color = + DefineLegacyInput(*this, used_locations, previous_unused_location); + } + if (loads.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { + input_back_color = DefineLegacyInput(*this, used_locations, previous_unused_location); + } + if (loads.AnyComponent(IR::Attribute::ColorBackSpecularR)) { + input_back_secondary_color = + DefineLegacyInput(*this, used_locations, previous_unused_location); + } + if (loads.AnyComponent(IR::Attribute::FogCoordinate)) { + input_fog_frag_coord = DefineLegacyInput(*this, used_locations, previous_unused_location); } for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { - const size_t location = - FindNextUnusedLocation(used_locations, previous_unused_location); - previous_unused_location = location; - used_locations.set(location); - const Id id{DefineInput(*this, F32[4], true)}; - Decorate(id, spv::Decoration::Location, location); - input_fixed_fnc_textures[index] = id; + input_fixed_fnc_textures[index] = + DefineLegacyInput(*this, used_locations, previous_unused_location); } } if (stage == Stage::TessellationEval) { @@ -1356,22 +1510,29 @@ void EmitContext::DefineOutputs(const IR::Program& program) { } size_t previous_unused_location = 0; if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { - const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); - previous_unused_location = location; - used_locations.set(location); - const Id id{DefineOutput(*this, F32[4], invocations)}; - Decorate(id, spv::Decoration::Location, static_cast<u32>(location)); - output_front_color = id; + output_front_color = + DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations); + } + if (info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { + output_front_secondary_color = + DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations); + } + if (info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { + output_back_color = + DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations); + } + if (info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { + output_back_secondary_color = + DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations); + } + if (info.stores.AnyComponent(IR::Attribute::FogCoordinate)) { + output_fog_frag_coord = + DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations); } for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { - const size_t location = - FindNextUnusedLocation(used_locations, previous_unused_location); - previous_unused_location = location; - used_locations.set(location); - const Id id{DefineOutput(*this, F32[4], invocations)}; - Decorate(id, spv::Decoration::Location, location); - output_fixed_fnc_textures[index] = id; + output_fixed_fnc_textures[index] = + DefineLegacyOutput(*this, used_locations, previous_unused_location, invocations); } } switch (stage) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 847d0c0e6..63f8185d9 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -113,6 +113,9 @@ public: [[nodiscard]] Id BitOffset8(const IR::Value& offset); [[nodiscard]] Id BitOffset16(const IR::Value& offset); + Id InputLegacyAttribute(IR::Attribute attribute); + Id OutputLegacyAttribute(IR::Attribute attribute); + Id Const(u32 value) { return Constant(U32[1], value); } @@ -235,6 +238,16 @@ public: Id indexed_load_func{}; Id indexed_store_func{}; + Id rescaling_uniform_constant{}; + Id rescaling_push_constants{}; + Id rescaling_textures_type{}; + Id rescaling_images_type{}; + u32 rescaling_textures_member_index{}; + u32 rescaling_images_member_index{}; + u32 rescaling_downfactor_member_index{}; + u32 texture_rescaling_index{}; + u32 image_rescaling_index{}; + Id local_memory{}; Id shared_memory_u8{}; @@ -269,12 +282,20 @@ public: Id input_position{}; Id input_front_color{}; + Id input_front_secondary_color{}; + Id input_back_color{}; + Id input_back_secondary_color{}; + Id input_fog_frag_coord{}; std::array<Id, 10> input_fixed_fnc_textures{}; std::array<Id, 32> input_generics{}; Id output_point_size{}; Id output_position{}; Id output_front_color{}; + Id output_front_secondary_color{}; + Id output_back_color{}; + Id output_back_secondary_color{}; + Id output_fog_frag_coord{}; std::array<Id, 10> output_fixed_fnc_textures{}; std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; @@ -299,10 +320,13 @@ private: void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); void DefineImageBuffers(const Info& info, u32& binding); - void DefineTextures(const Info& info, u32& binding); - void DefineImages(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding, u32& scaling_index); + void DefineImages(const Info& info, u32& binding, u32& scaling_index); void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); + void DefineRescalingInput(const Info& info); + void DefineRescalingInputPushConstant(); + void DefineRescalingInputUniformConstant(); void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index db0c935fe..4b25534ce 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -16,6 +16,19 @@ namespace Shader::Backend::SPIRV { +constexpr u32 NUM_TEXTURE_SCALING_WORDS = 4; +constexpr u32 NUM_IMAGE_SCALING_WORDS = 2; +constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS = + NUM_TEXTURE_SCALING_WORDS + NUM_IMAGE_SCALING_WORDS; + +struct RescalingLayout { + alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures; + alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images; + alignas(16) u32 down_factor; +}; +constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); +constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor); + [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 6f60c6574..bac683ae1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -43,23 +43,12 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... } } -bool IsFixedFncTexture(IR::Attribute attribute) { - return attribute >= IR::Attribute::FixedFncTexture0S && - attribute <= IR::Attribute::FixedFncTexture9Q; -} - -u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) { - if (!IsFixedFncTexture(attribute)) { - throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); - } - return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u; -} - -u32 FixedFncTextureAttributeElement(IR::Attribute attribute) { - if (!IsFixedFncTexture(attribute)) { - throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); - } - return static_cast<u32>(attribute) % 4u; +bool IsLegacyAttribute(IR::Attribute attribute) { + return (attribute >= IR::Attribute::ColorFrontDiffuseR && + attribute <= IR::Attribute::ColorBackSpecularA) || + attribute == IR::Attribute::FogCoordinate || + (attribute >= IR::Attribute::FixedFncTexture0S && + attribute <= IR::Attribute::FixedFncTexture9Q); } template <typename... Args> @@ -93,12 +82,16 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } - if (IsFixedFncTexture(attr)) { - const u32 index{FixedFncTextureAttributeIndex(attr)}; - const u32 element{FixedFncTextureAttributeElement(attr)}; - const Id element_id{ctx.Const(element)}; - return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index], - element_id); + if (IsLegacyAttribute(attr)) { + if (attr == IR::Attribute::FogCoordinate) { + return OutputAccessChain(ctx, ctx.output_f32, ctx.OutputLegacyAttribute(attr), + ctx.Const(0u)); + } else { + const u32 element{static_cast<u32>(attr) % 4}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.OutputLegacyAttribute(attr), + element_id); + } } switch (attr) { case IR::Attribute::PointSize: @@ -111,14 +104,6 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const Id element_id{ctx.Const(element)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: { - const u32 element{static_cast<u32>(attr) % 4}; - const Id element_id{ctx.Const(element)}; - return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id); - } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: @@ -341,11 +326,17 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const Id value{ctx.OpLoad(type->id, pointer)}; return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } - if (IsFixedFncTexture(attr)) { - const u32 index{FixedFncTextureAttributeIndex(attr)}; - const Id attr_id{ctx.input_fixed_fnc_textures[index]}; - const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))}; - return ctx.OpLoad(ctx.F32[1], attr_ptr); + if (IsLegacyAttribute(attr)) { + if (attr == IR::Attribute::FogCoordinate) { + const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, + ctx.InputLegacyAttribute(attr), ctx.Const(0u))}; + return ctx.OpLoad(ctx.F32[1], attr_ptr); + } else { + const Id element_id{ctx.Const(element)}; + const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, + ctx.InputLegacyAttribute(attr), element_id)}; + return ctx.OpLoad(ctx.F32[1], attr_ptr); + } } switch (attr) { case IR::Attribute::PrimitiveId: @@ -356,13 +347,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionW: return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: { - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color, - ctx.Const(element))); - } case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); @@ -542,6 +526,18 @@ Id EmitYDirection(EmitContext& ctx) { return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); } +Id EmitResolutionDownFactor(EmitContext& ctx) { + if (ctx.profile.unified_descriptor_binding) { + const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[1])}; + const Id index{ctx.Const(ctx.rescaling_downfactor_member_index)}; + const Id pointer{ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, index)}; + return ctx.OpLoad(ctx.F32[1], pointer); + } else { + const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; + return ctx.OpCompositeExtract(ctx.F32[1], composite, 2u); + } +} + Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; return ctx.OpLoad(ctx.U32[1], pointer); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 1d5364309..4d168a96d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -224,6 +224,36 @@ Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx Decorate(ctx, inst, sample); return ctx.OpCompositeExtract(result_type, sample, 1U); } + +Id IsScaled(EmitContext& ctx, const IR::Value& index, Id member_index, u32 base_index) { + const Id push_constant_u32{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1])}; + Id bit{}; + if (index.IsImmediate()) { + // Use BitwiseAnd instead of BitfieldExtract for better codegen on Nvidia OpenGL. + // LOP32I.NZ is used to set the predicate rather than BFE+ISETP. + const u32 index_value{index.U32() + base_index}; + const Id word_index{ctx.Const(index_value / 32)}; + const Id bit_index_mask{ctx.Const(1u << (index_value % 32))}; + const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants, + member_index, word_index)}; + const Id word{ctx.OpLoad(ctx.U32[1], pointer)}; + bit = ctx.OpBitwiseAnd(ctx.U32[1], word, bit_index_mask); + } else { + Id index_value{ctx.Def(index)}; + if (base_index != 0) { + index_value = ctx.OpIAdd(ctx.U32[1], index_value, ctx.Const(base_index)); + } + const Id bit_index{ctx.OpBitwiseAnd(ctx.U32[1], index_value, ctx.Const(31u))}; + bit = ctx.OpBitFieldUExtract(ctx.U32[1], index_value, bit_index, ctx.Const(1u)); + } + return ctx.OpINotEqual(ctx.U1, bit, ctx.u32_zero_value); +} + +Id BitTest(EmitContext& ctx, Id mask, Id bit) { + const Id shifted{ctx.OpShiftRightLogical(ctx.U32[1], mask, bit)}; + const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; + return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); +} } // Anonymous namespace Id EmitBindlessImageSampleImplicitLod(EmitContext&) { @@ -470,4 +500,28 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id ctx.OpImageWrite(Image(ctx, index, info), coords, color); } +Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index) { + if (ctx.profile.unified_descriptor_binding) { + const Id member_index{ctx.Const(ctx.rescaling_textures_member_index)}; + return IsScaled(ctx, index, member_index, ctx.texture_rescaling_index); + } else { + const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; + const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 0u)}; + const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)}; + return BitTest(ctx, mask, ctx.Def(index)); + } +} + +Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index) { + if (ctx.profile.unified_descriptor_binding) { + const Id member_index{ctx.Const(ctx.rescaling_images_member_index)}; + return IsScaled(ctx, index, member_index, ctx.image_rescaling_index); + } else { + const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; + const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 1u)}; + const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)}; + return BitTest(ctx, mask, ctx.Def(index)); + } +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index c9db1c164..6cd22dd3e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -75,6 +75,7 @@ Id EmitInvocationId(EmitContext& ctx); Id EmitSampleId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitYDirection(EmitContext& ctx); +Id EmitResolutionDownFactor(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); @@ -283,6 +284,8 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitSDiv32(EmitContext& ctx, Id a, Id b); +Id EmitUDiv32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value); @@ -510,6 +513,8 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id derivates, Id offset, Id lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); +Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index); Id EmitBindlessImageAtomicIAdd32(EmitContext&); Id EmitBindlessImageAtomicSMin32(EmitContext&); Id EmitBindlessImageAtomicUMin32(EmitContext&); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 3501d7495..50277eec3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -72,6 +72,14 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } +Id EmitSDiv32(EmitContext& ctx, Id a, Id b) { + return ctx.OpSDiv(ctx.U32[1], a, b); +} + +Id EmitUDiv32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUDiv(ctx.U32[1], a, b); +} + Id EmitINeg32(EmitContext& ctx, Id value) { return ctx.OpSNegate(ctx.U32[1], value); } diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 7c08b25ce..974efa4a0 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -22,6 +22,11 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { PrependNewInst(end(), op, args); } +Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) { + Inst* const inst{inst_pool->Create(base_inst)}; + return instructions.insert(insertion_point, *inst); +} + Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 7e134b4c7..fbfe98266 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -40,6 +40,9 @@ public: /// Appends a new instruction to the end of this basic block. void AppendNewInst(Opcode op, std::initializer_list<Value> args); + /// Prepends a copy of an instruction to this basic block before the insertion point. + iterator PrependNewInst(iterator insertion_point, const Inst& base_inst); + /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args = {}, u32 flags = 0); @@ -152,6 +155,17 @@ public: return instructions.crend(); } + // Set the order of the block, it can be set pre order, the user decides + void SetOrder(u32 new_order) { + order = new_order; + } + + // Get the order of the block. + // The higher, the closer is the block to the end. + [[nodiscard]] u32 GetOrder() const { + return order; + } + private: /// Memory pool for instruction list ObjectPool<Inst>* inst_pool; @@ -171,6 +185,9 @@ private: /// Intrusively stored host definition of this block. u32 definition{}; + + /// Order of the block. + u32 order{}; }; using BlockList = std::vector<Block*>; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 13159a68d..356f889ac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -375,6 +375,10 @@ F32 IREmitter::YDirection() { return Inst<F32>(Opcode::YDirection); } +F32 IREmitter::ResolutionDownFactor() { + return Inst<F32>(Opcode::ResolutionDownFactor); +} + U32 IREmitter::LaneId() { return Inst<U32>(Opcode::LaneId); } @@ -1141,6 +1145,10 @@ U32 IREmitter::IMul(const U32& a, const U32& b) { return Inst<U32>(Opcode::IMul32, a, b); } +U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) { + return Inst<U32>(is_signed ? Opcode::SDiv32 : Opcode::UDiv32, a, b); +} + U32U64 IREmitter::INeg(const U32U64& value) { switch (value.Type()) { case Type::U32: @@ -1938,6 +1946,14 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c return Inst(op, Flags{info}, handle, coords, value); } +U1 IREmitter::IsTextureScaled(const U32& index) { + return Inst<U1>(Opcode::IsTextureScaled, index); +} + +U1 IREmitter::IsImageScaled(const U32& index) { + return Inst<U1>(Opcode::IsImageScaled, index); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst<U1>(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1b89ca5a0..13eefa88b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -102,6 +102,8 @@ public: [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] F32 YDirection(); + [[nodiscard]] F32 ResolutionDownFactor(); + [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 LoadGlobalU8(const U64& address); @@ -207,6 +209,7 @@ public: [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32 IAbs(const U32& value); [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); @@ -356,6 +359,10 @@ public: TextureInstInfo info); [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); + + [[nodiscard]] U1 IsTextureScaled(const U32& index); + [[nodiscard]] U1 IsImageScaled(const U32& index); + [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 3dfa5a880..97e2bf6af 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -6,6 +6,7 @@ #include <memory> #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/type.h" #include "shader_recompiler/frontend/ir/value.h" @@ -46,6 +47,17 @@ Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { } } +Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} { + if (base.op == Opcode::Phi) { + throw NotImplementedException("Copying phi node"); + } + std::construct_at(&args); + const size_t num_args{base.NumArgs()}; + for (size_t index = 0; index < num_args; ++index) { + SetArg(index, base.Arg(index)); + } +} + Inst::~Inst() { if (op == Opcode::Phi) { std::destroy_at(&phi_args); @@ -253,6 +265,10 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { } IR::Type Inst::Type() const { + if (op == IR::Opcode::Phi) { + // The type of a phi node is stored in its flags + return Flags<IR::Type>(); + } return TypeOf(op); } @@ -291,6 +307,16 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) { phi_args.emplace_back(predecessor, value); } +void Inst::OrderPhiArgs() { + if (op != Opcode::Phi) { + throw LogicError("{} is not a Phi instruction", op); + } + std::sort(phi_args.begin(), phi_args.end(), + [](const std::pair<Block*, Value>& a, const std::pair<Block*, Value>& b) { + return a.first->GetOrder() < b.first->GetOrder(); + }); +} + void Inst::Invalidate() { ClearArgs(); ReplaceOpcode(Opcode::Void); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index d91098c80..6929919df 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -62,6 +62,7 @@ OPCODE(InvocationId, U32, OPCODE(SampleId, U32, ) OPCODE(IsHelperInvocation, U1, ) OPCODE(YDirection, F32, ) +OPCODE(ResolutionDownFactor, F32, ) // Undefined OPCODE(UndefU1, U1, ) @@ -286,6 +287,8 @@ OPCODE(IAdd64, U64, U64, OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) +OPCODE(SDiv32, U32, U32, U32, ) +OPCODE(UDiv32, U32, U32, U32, ) OPCODE(INeg32, U32, U32, ) OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) @@ -490,6 +493,9 @@ OPCODE(ImageGradient, F32x4, Opaq OPCODE(ImageRead, U32x4, Opaque, Opaque, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) +OPCODE(IsTextureScaled, U1, U32, ) +OPCODE(IsImageScaled, U1, U32, ) + // Atomic Image operations OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, ) diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 334bb47aa..947579852 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -116,10 +116,10 @@ public: class Inst : public boost::intrusive::list_base_hook<> { public: explicit Inst(IR::Opcode op_, u32 flags_) noexcept; + explicit Inst(const Inst& base); ~Inst(); Inst& operator=(const Inst&) = delete; - Inst(const Inst&) = delete; Inst& operator=(Inst&&) = delete; Inst(Inst&&) = delete; @@ -182,6 +182,9 @@ public: /// Add phi operand to a phi instruction. void AddPhiOperand(Block* predecessor, const Value& value); + /// Orders the Phi arguments from farthest away to nearest. + void OrderPhiArgs(); + void Invalidate(); void ClearArgs(); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 012d55357..267ebe4af 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -27,9 +27,11 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { } IR::BlockList blocks; blocks.reserve(num_syntax_blocks); + u32 order_index{}; for (const auto& node : syntax_list) { if (node.type == IR::AbstractSyntaxNode::Type::Block) { blocks.push_back(node.data.block); + blocks.back()->SetOrder(order_index++); } } return blocks; @@ -177,6 +179,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo Optimization::TexturePass(env, program); Optimization::ConstantPropagationPass(program); + + if (Settings::values.resolution_info.active) { + Optimization::RescalingPass(program); + } Optimization::DeadCodeEliminationPass(program); if (Settings::values.renderer_debug) { Optimization::VerificationPass(program); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f69e1c9cc..1e476d83d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -430,6 +430,11 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; + case IR::Opcode::ResolutionDownFactor: + case IR::Opcode::IsTextureScaled: + case IR::Opcode::IsImageScaled: + info.uses_rescaling_uniform = true; + break; case IR::Opcode::LaneId: info.uses_subgroup_invocation_id = true; break; diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 2f89b1ea0..f877c7ba0 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -19,6 +19,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); +void RescalingPass(IR::Program& program); void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Program& program); diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp new file mode 100644 index 000000000..c28500dd1 --- /dev/null +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -0,0 +1,327 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "common/settings.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { +[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) { + switch (type) { + case TextureType::Color2D: + case TextureType::ColorArray2D: + return true; + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + break; + } + return false; +} + +void VisitMark(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::ShuffleIndex: + case IR::Opcode::ShuffleUp: + case IR::Opcode::ShuffleDown: + case IR::Opcode::ShuffleButterfly: { + const IR::Value shfl_arg{inst.Arg(0)}; + if (shfl_arg.IsImmediate()) { + break; + } + const IR::Inst* const arg_inst{shfl_arg.InstRecursive()}; + if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) { + break; + } + const IR::Value bitcast_arg{arg_inst->Arg(0)}; + if (bitcast_arg.IsImmediate()) { + break; + } + IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()}; + bool must_patch_outside = false; + if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) { + const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + bitcast_inst->SetFlags<u32>(0xDEADBEEF); + must_patch_outside = true; + break; + default: + break; + } + } + if (must_patch_outside) { + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::F32 new_inst{&*block.PrependNewInst(it, inst)}; + const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; + const IR::Value converted{ir.FPMul(new_inst, up_factor)}; + inst.ReplaceUsesWith(converted); + } + break; + } + + default: + break; + } +} + +void PatchFragCoord(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::F32 down_factor{ir.ResolutionDownFactor()}; + const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())}; + const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)}; + inst.ReplaceUsesWith(downscaled_frag_coord); +} + +void PatchPointSize(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::F32 point_value{inst.Arg(1)}; + const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; + const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)}; + inst.SetArg(1, upscaled_point_value); +} + +[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { + IR::U32 scaled_value{value}; + if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { + scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale)); + } + if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) { + scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift)); + } + return IR::U32{ir.Select(is_scaled, scaled_value, value)}; +} + +[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value, + const IR::Attribute attrib) { + const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)}; + const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)}; + const IR::F32 frag_coord{ir.GetAttribute(attrib)}; + const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)}; + const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))}; + const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))}; + return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)}; +} + +[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { + IR::U32 scaled_value{value}; + if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) { + scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift)); + } + if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { + scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale)); + } + return IR::U32{ir.Select(is_scaled, scaled_value, value)}; +} + +void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) { + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; + switch (info.type) { + case TextureType::Color2D: + case TextureType::ColorArray2D: { + const IR::Value new_inst{&*block.PrependNewInst(it, inst)}; + const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})}; + const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})}; + const IR::Value replacement{ir.CompositeConstruct( + width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))}; + inst.ReplaceUsesWith(replacement); + break; + } + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + +void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled, + size_t index) { + const IR::Value composite{inst.Arg(index)}; + if (composite.IsEmpty()) { + return; + } + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})}; + const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})}; + switch (info.type) { + case TextureType::Color2D: + inst.SetArg(index, ir.CompositeConstruct(x, y)); + break; + case TextureType::ColorArray2D: { + const IR::U32 z{ir.CompositeExtract(composite, 2)}; + inst.SetArg(index, ir.CompositeConstruct(x, y, z)); + break; + } + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + +void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const IR::Value coord{inst.Arg(1)}; + const IR::U32 coord_x{ir.CompositeExtract(coord, 0)}; + const IR::U32 coord_y{ir.CompositeExtract(coord, 1)}; + + const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)}; + const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)}; + switch (info.type) { + case TextureType::Color2D: + inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y)); + break; + case TextureType::ColorArray2D: { + const IR::U32 z{ir.CompositeExtract(coord, 2)}; + inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z)); + break; + } + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + +void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (!IsTextureTypeRescalable(info.type)) { + return; + } + const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; + SubScaleCoord(ir, inst, is_scaled); + // Scale ImageFetch offset + ScaleIntegerComposite(ir, inst, is_scaled, 2); +} + +void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (!IsTextureTypeRescalable(info.type)) { + return; + } + const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; + SubScaleCoord(ir, inst, is_scaled); +} + +void PatchImageFetch(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (!IsTextureTypeRescalable(info.type)) { + return; + } + const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; + ScaleIntegerComposite(ir, inst, is_scaled, 1); + // Scale ImageFetch offset + ScaleIntegerComposite(ir, inst, is_scaled, 2); +} + +void PatchImageRead(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (!IsTextureTypeRescalable(info.type)) { + return; + } + const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; + ScaleIntegerComposite(ir, inst, is_scaled, 1); +} + +void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { + const bool is_fragment_shader{program.stage == Stage::Fragment}; + switch (inst.GetOpcode()) { + case IR::Opcode::GetAttribute: { + const IR::Attribute attr{inst.Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) { + PatchFragCoord(block, inst); + } + break; + default: + break; + } + break; + } + case IR::Opcode::SetAttribute: { + const IR::Attribute attr{inst.Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PointSize: + if (inst.Flags<u32>() != 0xDEADBEEF) { + PatchPointSize(block, inst); + } + break; + default: + break; + } + break; + } + case IR::Opcode::ImageQueryDimensions: + PatchImageQueryDimensions(block, inst); + break; + case IR::Opcode::ImageFetch: + if (is_fragment_shader) { + SubScaleImageFetch(block, inst); + } else { + PatchImageFetch(block, inst); + } + break; + case IR::Opcode::ImageRead: + if (is_fragment_shader) { + SubScaleImageRead(block, inst); + } else { + PatchImageRead(block, inst); + } + break; + default: + break; + } +} +} // Anonymous namespace + +void RescalingPass(IR::Program& program) { + const bool is_fragment_shader{program.stage == Stage::Fragment}; + if (is_fragment_shader) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + VisitMark(*block, inst); + } + } + } + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(program, *block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 53145fb5e..87aa09358 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,6 +14,7 @@ // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 // +#include <deque> #include <span> #include <variant> #include <vector> @@ -370,6 +371,26 @@ void VisitBlock(Pass& pass, IR::Block* block) { } pass.SealBlock(block); } + +IR::Type GetConcreteType(IR::Inst* inst) { + std::deque<IR::Inst*> queue; + queue.push_back(inst); + while (!queue.empty()) { + IR::Inst* current = queue.front(); + queue.pop_front(); + const size_t num_args{current->NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const auto set_type = current->Arg(i).Type(); + if (set_type != IR::Type::Opaque) { + return set_type; + } + if (!current->Arg(i).IsImmediate()) { + queue.push_back(current->Arg(i).Inst()); + } + } + } + return IR::Type::Opaque; +} } // Anonymous namespace void SsaRewritePass(IR::Program& program) { @@ -378,6 +399,16 @@ void SsaRewritePass(IR::Program& program) { for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) { VisitBlock(pass, *block); } + for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) { + for (IR::Inst& inst : (*block)->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Phi) { + if (inst.Type() == IR::Type::Opaque) { + inst.SetFlags(GetConcreteType(&inst)); + } + inst.OrderPhiArgs(); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 4ef4dbd40..9f375c30e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -172,6 +172,7 @@ struct Info { bool uses_global_memory{}; bool uses_atomic_image_u32{}; bool uses_shadow_lod{}; + bool uses_rescaling_uniform{}; IR::Type used_constant_buffer_types{}; IR::Type used_storage_buffer_types{}; @@ -190,4 +191,13 @@ struct Info { ImageDescriptors image_descriptors; }; +template <typename Descriptors> +u32 NumDescriptors(const Descriptors& descriptors) { + u32 num{}; + for (const auto& desc : descriptors) { + num += desc.count; + } + return num; +} + } // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 269db21a5..91a30fef7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -15,6 +15,8 @@ add_library(video_core STATIC command_classes/codecs/codec.h command_classes/codecs/h264.cpp command_classes/codecs/h264.h + command_classes/codecs/vp8.cpp + command_classes/codecs/vp8.h command_classes/codecs/vp9.cpp command_classes/codecs/vp9.h command_classes/codecs/vp9_types.h @@ -130,6 +132,8 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h + renderer_vulkan/vk_fsr.cpp + renderer_vulkan/vk_fsr.h renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d350c9b36..43bed63ac 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -853,12 +853,14 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { } if constexpr (USE_MEMORY_MAPS) { auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); + runtime.PreCopyBarrier(); for (auto& [copy, buffer_id] : downloads) { // Have in mind the staging buffer offset for the copy copy.dst_offset += download_staging.offset; const std::array copies{copy}; - runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); + runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); } + runtime.PostCopyBarrier(); runtime.Finish(); for (const auto& [copy, buffer_id] : downloads) { const Buffer& buffer = slot_buffers[buffer_id]; diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 61966cbfe..916277811 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -8,6 +8,7 @@ #include "common/settings.h" #include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/h264.h" +#include "video_core/command_classes/codecs/vp8.h" #include "video_core/command_classes/codecs/vp9.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -46,6 +47,7 @@ void AVFrameDeleter(AVFrame* ptr) { Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), + vp8_decoder(std::make_unique<Decoder::VP8>(gpu)), vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} Codec::~Codec() { @@ -135,7 +137,9 @@ void Codec::Initialize() { switch (current_codec) { case NvdecCommon::VideoCodec::H264: return AV_CODEC_ID_H264; - case NvdecCommon::VideoCodec::Vp9: + case NvdecCommon::VideoCodec::VP8: + return AV_CODEC_ID_VP8; + case NvdecCommon::VideoCodec::VP9: return AV_CODEC_ID_VP9; default: UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); @@ -176,19 +180,27 @@ void Codec::Decode() { return; } bool vp9_hidden_frame = false; - std::vector<u8> frame_data; - if (current_codec == NvdecCommon::VideoCodec::H264) { - frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame); - } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { - frame_data = vp9_decoder->ComposeFrameHeader(state); - vp9_hidden_frame = vp9_decoder->WasFrameHidden(); - } + const auto& frame_data = [&]() { + switch (current_codec) { + case Tegra::NvdecCommon::VideoCodec::H264: + return h264_decoder->ComposeFrame(state, is_first_frame); + case Tegra::NvdecCommon::VideoCodec::VP8: + return vp8_decoder->ComposeFrame(state); + case Tegra::NvdecCommon::VideoCodec::VP9: + vp9_decoder->ComposeFrame(state); + vp9_hidden_frame = vp9_decoder->WasFrameHidden(); + return vp9_decoder->GetFrameBytes(); + default: + UNREACHABLE(); + return std::vector<u8>{}; + } + }(); AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; if (!packet) { LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); return; } - packet->data = frame_data.data(); + packet->data = const_cast<u8*>(frame_data.data()); packet->size = static_cast<s32>(frame_data.size()); if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) { LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res); @@ -252,11 +264,11 @@ std::string_view Codec::GetCurrentCodecName() const { return "None"; case NvdecCommon::VideoCodec::H264: return "H264"; - case NvdecCommon::VideoCodec::Vp8: + case NvdecCommon::VideoCodec::VP8: return "VP8"; case NvdecCommon::VideoCodec::H265: return "H265"; - case NvdecCommon::VideoCodec::Vp9: + case NvdecCommon::VideoCodec::VP9: return "VP9"; default: return "Unknown"; diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index f9a80886f..13ed88382 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -29,6 +29,7 @@ using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>; namespace Decoder { class H264; +class VP8; class VP9; } // namespace Decoder @@ -72,6 +73,7 @@ private: GPU& gpu; const NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; + std::unique_ptr<Decoder::VP8> vp8_decoder; std::unique_ptr<Decoder::VP9> vp9_decoder; std::queue<AVFramePtr> av_frames{}; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 5519c4705..84f1fa938 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -45,8 +45,8 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {} H264::~H264() = default; -const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, - bool is_first_frame) { +const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& state, + bool is_first_frame) { H264DecoderContext context; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index bfe84a472..1899d8e7f 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -75,9 +75,9 @@ public: explicit H264(GPU& gpu); ~H264(); - /// Compose the H264 header of the frame for FFmpeg decoding - [[nodiscard]] const std::vector<u8>& ComposeFrameHeader( - const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); + /// Compose the H264 frame for FFmpeg decoding + [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state, + bool is_first_frame = false); private: std::vector<u8> frame; diff --git a/src/video_core/command_classes/codecs/vp8.cpp b/src/video_core/command_classes/codecs/vp8.cpp new file mode 100644 index 000000000..32ad0ec16 --- /dev/null +++ b/src/video_core/command_classes/codecs/vp8.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <vector> + +#include "video_core/command_classes/codecs/vp8.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra::Decoder { +VP8::VP8(GPU& gpu_) : gpu(gpu_) {} + +VP8::~VP8() = default; + +const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& state) { + VP8PictureInfo info; + gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); + + const bool is_key_frame = info.key_frame == 1u; + const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size); + const size_t header_size = is_key_frame ? 10u : 3u; + frame.resize(header_size + bitstream_size); + + // Based on page 30 of the VP8 specification. + // https://datatracker.ietf.org/doc/rfc6386/ + frame[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes). + frame[0] |= static_cast<u8>((info.version & 7u) << 1u); // 3-bit version number + frame[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag + + // The next 19-bits are the first partition size + frame[0] |= static_cast<u8>((info.first_part_size & 7u) << 5u); + frame[1] = static_cast<u8>((info.first_part_size & 0x7f8u) >> 3u); + frame[2] = static_cast<u8>((info.first_part_size & 0x7f800u) >> 11u); + + if (is_key_frame) { + frame[3] = 0x9du; + frame[4] = 0x01u; + frame[5] = 0x2au; + // TODO(ameerj): Horizontal/Vertical Scale + // 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits) + frame[6] = static_cast<u8>(info.frame_width & 0xff); + frame[7] = static_cast<u8>(((info.frame_width >> 8) & 0x3f)); + // 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits) + frame[8] = static_cast<u8>(info.frame_height & 0xff); + frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f)); + } + const u64 bitstream_offset = state.frame_bitstream_offset; + gpu.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size); + + return frame; +} + +} // namespace Tegra::Decoder diff --git a/src/video_core/command_classes/codecs/vp8.h b/src/video_core/command_classes/codecs/vp8.h new file mode 100644 index 000000000..41fc7b403 --- /dev/null +++ b/src/video_core/command_classes/codecs/vp8.h @@ -0,0 +1,74 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <vector> + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/command_classes/nvdec_common.h" + +namespace Tegra { +class GPU; +namespace Decoder { + +class VP8 { +public: + explicit VP8(GPU& gpu); + ~VP8(); + + /// Compose the VP8 frame for FFmpeg decoding + [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state); + +private: + std::vector<u8> frame; + GPU& gpu; + + struct VP8PictureInfo { + INSERT_PADDING_WORDS_NOINIT(14); + u16 frame_width; // actual frame width + u16 frame_height; // actual frame height + u8 key_frame; + u8 version; + union { + u8 raw; + BitField<0, 2, u8> tile_format; + BitField<2, 3, u8> gob_height; + BitField<5, 3, u8> reserverd_surface_format; + }; + u8 error_conceal_on; // 1: error conceal on; 0: off + u32 first_part_size; // the size of first partition(frame header and mb header partition) + u32 hist_buffer_size; // in units of 256 + u32 vld_buffer_size; // in units of 1 + // Current frame buffers + std::array<u32, 2> frame_stride; // [y_c] + u32 luma_top_offset; // offset of luma top field in units of 256 + u32 luma_bot_offset; // offset of luma bottom field in units of 256 + u32 luma_frame_offset; // offset of luma frame in units of 256 + u32 chroma_top_offset; // offset of chroma top field in units of 256 + u32 chroma_bot_offset; // offset of chroma bottom field in units of 256 + u32 chroma_frame_offset; // offset of chroma frame in units of 256 + + INSERT_PADDING_BYTES_NOINIT(0x1c); // NvdecDisplayParams + + // Decode picture buffer related + s8 current_output_memory_layout; + // output NV12/NV24 setting. index 0: golden; 1: altref; 2: last + std::array<s8, 3> output_memory_layout; + + u8 segmentation_feature_data_update; + INSERT_PADDING_BYTES_NOINIT(3); + + // ucode return result + u32 result_value; + std::array<u32, 8> partition_offset; + INSERT_PADDING_WORDS_NOINIT(3); + }; + static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size"); +}; + +} // namespace Decoder +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index d7e749485..2c00181fa 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -770,7 +770,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { return uncomp_writer; } -const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state) { +void VP9::ComposeFrame(const NvdecCommon::NvdecRegisters& state) { std::vector<u8> bitstream; { Vp9FrameContainer curr_frame = GetCurrentFrame(state); @@ -792,7 +792,6 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters frame.begin() + uncompressed_header.size()); std::copy(bitstream.begin(), bitstream.end(), frame.begin() + uncompressed_header.size() + compressed_header.size()); - return frame; } VpxRangeEncoder::VpxRangeEncoder() { diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index e6e9fc17e..2e735c792 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h @@ -116,16 +116,20 @@ public: VP9(VP9&&) = default; VP9& operator=(VP9&&) = delete; - /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec - /// documentation - [[nodiscard]] const std::vector<u8>& ComposeFrameHeader( - const NvdecCommon::NvdecRegisters& state); + /// Composes the VP9 frame from the GPU state information. + /// Based on the official VP9 spec documentation + void ComposeFrame(const NvdecCommon::NvdecRegisters& state); /// Returns true if the most recent frame was a hidden frame. [[nodiscard]] bool WasFrameHidden() const { return !current_frame_info.show_frame; } + /// Returns a const reference to the composed frame data. + [[nodiscard]] const std::vector<u8>& GetFrameBytes() const { + return frame; + } + private: /// Generates compressed header probability updates in the bitstream writer template <typename T, std::size_t N> diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index b5c55f14a..9aaf5247e 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -35,7 +35,8 @@ AVFramePtr Nvdec::GetFrame() { void Nvdec::Execute() { switch (codec->GetCurrentCodec()) { case NvdecCommon::VideoCodec::H264: - case NvdecCommon::VideoCodec::Vp9: + case NvdecCommon::VideoCodec::VP8: + case NvdecCommon::VideoCodec::VP9: codec->Decode(); break; default: diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h index 6a24e00a0..8a35c44a1 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/command_classes/nvdec_common.h @@ -13,9 +13,9 @@ namespace Tegra::NvdecCommon { enum class VideoCodec : u64 { None = 0x0, H264 = 0x3, - Vp8 = 0x5, + VP8 = 0x5, H265 = 0x7, - Vp9 = 0x9, + VP9 = 0x9, }; // NVDEC should use a 32-bit address space, but is mapped to 64-bit, @@ -50,7 +50,10 @@ struct NvdecRegisters { u64 h264_last_surface_chroma_offset; ///< 0x0858 std::array<u64, 17> surface_luma_offset; ///< 0x0860 std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 - INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970 + INSERT_PADDING_WORDS_NOINIT(68); ///< 0x0970 + u64 vp8_prob_data_offset; ///< 0x0A80 + u64 vp8_header_partition_buf_offset; ///< 0x0A88 + INSERT_PADDING_WORDS_NOINIT(60); ///< 0x0A90 u64 vp9_entropy_probs_offset; ///< 0x0B80 u64 vp9_backward_updates_offset; ///< 0x0B88 u64 vp9_last_frame_segmap_offset; ///< 0x0B90 @@ -81,6 +84,8 @@ ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); ASSERT_REG_POSITION(surface_luma_offset, 0x10C); ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); +ASSERT_REG_POSITION(vp8_prob_data_offset, 0x150); +ASSERT_REG_POSITION(vp8_header_partition_buf_offset, 0x151); ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index f0d545f90..d63ad5a35 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -29,6 +29,8 @@ enum : u8 { ColorBuffer6, ColorBuffer7, ZetaBuffer, + RescaleViewports, + RescaleScissors, VertexBuffers, VertexBuffer0, diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 05e5c94f3..c89a5d693 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -83,6 +83,7 @@ enum class DepthFormat : u32 { S8_UINT_Z24_UNORM = 0x14, D24X8_UNORM = 0x15, D24S8_UNORM = 0x16, + S8_UINT = 0x17, D24C8_UNORM = 0x18, D32_FLOAT_S8X24_UINT = 0x19, }; diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 20d748c12..d779a967a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,3 +1,11 @@ +set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) + +set(GLSL_INCLUDES + fidelityfx_fsr.comp + ${FIDELITYFX_INCLUDE_DIR}/ffx_a.h + ${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h +) + set(SHADER_FILES astc_decoder.comp block_linear_unswizzle_2d.comp @@ -5,14 +13,25 @@ set(SHADER_FILES convert_depth_to_float.frag convert_float_to_depth.frag full_screen_triangle.vert + fxaa.frag + fxaa.vert opengl_copy_bc4.comp opengl_present.frag opengl_present.vert + opengl_present_scaleforce.frag pitch_unswizzle.comp + present_bicubic.frag + present_gaussian.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag + vulkan_fidelityfx_fsr_easu_fp16.comp + vulkan_fidelityfx_fsr_easu_fp32.comp + vulkan_fidelityfx_fsr_rcas_fp16.comp + vulkan_fidelityfx_fsr_rcas_fp32.comp vulkan_present.frag vulkan_present.vert + vulkan_present_scaleforce_fp16.frag + vulkan_present_scaleforce_fp32.frag vulkan_quad_indexed.comp vulkan_uint8.comp ) @@ -76,7 +95,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) OUTPUT ${SPIRV_HEADER_FILE} COMMAND - ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} + ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} MAIN_DEPENDENCY ${SOURCE_FILE} ) @@ -84,9 +103,12 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) endif() endforeach() +set(SHADER_SOURCES ${SHADER_FILES}) +list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) + add_custom_target(host_shaders DEPENDS ${SHADER_HEADERS} SOURCES - ${SHADER_FILES} + ${SHADER_SOURCES} ) diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp new file mode 100644 index 000000000..6b97f789d --- /dev/null +++ b/src/video_core/host_shaders/fidelityfx_fsr.comp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +//!#version 460 core +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable +#extension GL_GOOGLE_include_directive : enable +#extension GL_EXT_shader_explicit_arithmetic_types : require + +// FidelityFX Super Resolution Sample +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +layout( push_constant ) uniform constants { + uvec4 Const0; + uvec4 Const1; + uvec4 Const2; + uvec4 Const3; +}; + +layout(set=0,binding=0) uniform sampler2D InputTexture; +layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; + +#define A_GPU 1 +#define A_GLSL 1 + +#ifndef YUZU_USE_FP16 + #include "ffx_a.h" + + #if USE_EASU + #define FSR_EASU_F 1 + AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } + AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } + AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_F 1 + AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } + void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + #endif +#else + #define A_HALF + #include "ffx_a.h" + + #if USE_EASU + #define FSR_EASU_H 1 + AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } + AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } + AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_H 1 + AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} + #endif +#endif + +#include "ffx_fsr1.h" + +void CurrFilter(AU2 pos) { +#if USE_BILINEAR + AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); + imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0)); +#endif +#if USE_EASU + #ifndef YUZU_USE_FP16 + AF3 c; + FsrEasuF(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrEasuH(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif +#endif +#if USE_RCAS + #ifndef YUZU_USE_FP16 + AF3 c; + FsrRcasF(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrRcasH(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif +#endif +} + +layout(local_size_x=64) in; +void main() { + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); + CurrFilter(gxy); + gxy.x += 8u; + CurrFilter(gxy); + gxy.y += 8u; + CurrFilter(gxy); + gxy.x -= 8u; + CurrFilter(gxy); +} diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag new file mode 100644 index 000000000..02f4068d1 --- /dev/null +++ b/src/video_core/host_shaders/fxaa.frag @@ -0,0 +1,76 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Source code is adapted from +// https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/ + +#version 460 + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec4 posPos; + +layout (location = 0) out vec4 frag_color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +const float FXAA_SPAN_MAX = 8.0; +const float FXAA_REDUCE_MUL = 1.0 / 8.0; +const float FXAA_REDUCE_MIN = 1.0 / 128.0; + +#define FxaaTexLod0(t, p) textureLod(t, p, 0.0) +#define FxaaTexOff(t, p, o) textureLodOffset(t, p, 0.0, o) + +vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) { + + vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz; + vec3 rgbNE = FxaaTexOff(tex, posPos.zw, ivec2(1,0)).xyz; + vec3 rgbSW = FxaaTexOff(tex, posPos.zw, ivec2(0,1)).xyz; + vec3 rgbSE = FxaaTexOff(tex, posPos.zw, ivec2(1,1)).xyz; + vec3 rgbM = FxaaTexLod0(tex, posPos.xy).xyz; +/*---------------------------------------------------------*/ + vec3 luma = vec3(0.299, 0.587, 0.114); + float lumaNW = dot(rgbNW, luma); + float lumaNE = dot(rgbNE, luma); + float lumaSW = dot(rgbSW, luma); + float lumaSE = dot(rgbSE, luma); + float lumaM = dot(rgbM, luma); +/*---------------------------------------------------------*/ + float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE))); + float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE))); +/*---------------------------------------------------------*/ + vec2 dir; + dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE)); + dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE)); +/*---------------------------------------------------------*/ + float dirReduce = max( + (lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL), + FXAA_REDUCE_MIN); + float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce); + dir = min(vec2( FXAA_SPAN_MAX, FXAA_SPAN_MAX), + max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX), + dir * rcpDirMin)) / textureSize(tex, 0); +/*--------------------------------------------------------*/ + vec3 rgbA = (1.0 / 2.0) * ( + FxaaTexLod0(tex, posPos.xy + dir * (1.0 / 3.0 - 0.5)).xyz + + FxaaTexLod0(tex, posPos.xy + dir * (2.0 / 3.0 - 0.5)).xyz); + vec3 rgbB = rgbA * (1.0 / 2.0) + (1.0 / 4.0) * ( + FxaaTexLod0(tex, posPos.xy + dir * (0.0 / 3.0 - 0.5)).xyz + + FxaaTexLod0(tex, posPos.xy + dir * (3.0 / 3.0 - 0.5)).xyz); + float lumaB = dot(rgbB, luma); + if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA; + return rgbB; +} + +void main() { + frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0); +} diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert new file mode 100644 index 000000000..ac20c04e9 --- /dev/null +++ b/src/video_core/host_shaders/fxaa.vert @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 + +out gl_PerVertex { + vec4 gl_Position; +}; + +const vec2 vertices[4] = + vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0)); + +layout (location = 0) out vec4 posPos; + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 0 +#define VERTEX_ID gl_VertexIndex + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 +#define VERTEX_ID gl_VertexID + +#endif + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +const float FXAA_SUBPIX_SHIFT = 0; + +void main() { + vec2 vertex = vertices[VERTEX_ID]; + gl_Position = vec4(vertex, 0.0, 1.0); + vec2 vert_tex_coord = (vertex + 1.0) / 2.0; + posPos.xy = vert_tex_coord; + posPos.zw = vert_tex_coord - (0.5 + FXAA_SUBPIX_SHIFT) / textureSize(input_texture, 0); +} diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag new file mode 100644 index 000000000..71ff9e1e3 --- /dev/null +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag @@ -0,0 +1,130 @@ +// MIT License +// +// Copyright (c) 2020 BreadFish64 +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce + +//! #version 460 + +#extension GL_ARB_separate_shader_objects : enable + +#ifdef YUZU_USE_FP16 + +#extension GL_AMD_gpu_shader_half_float : enable +#extension GL_NV_gpu_shader5 : enable + +#define lfloat float16_t +#define lvec2 f16vec2 +#define lvec3 f16vec3 +#define lvec4 f16vec4 + +#else + +#define lfloat float +#define lvec2 vec2 +#define lvec3 vec3 +#define lvec4 vec4 + +#endif + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec2 tex_coord; + +layout (location = 0) out vec4 frag_color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +const bool ignore_alpha = true; + +lfloat ColorDist1(lvec4 a, lvec4 b) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const lvec3 K = lvec3(0.2627, 0.6780, 0.0593); + const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b); + const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r); + lvec4 diff = a - b; + lfloat Y = dot(diff.rgb, K); + lfloat Cb = scaleB * (diff.b - Y); + lfloat Cr = scaleR * (diff.r - Y); + lvec3 YCbCr = lvec3(Y, Cb, Cr); + lfloat d = length(YCbCr); + if (ignore_alpha) { + return d; + } + return sqrt(a.a * b.a * d * d + diff.a * diff.a); +} + +lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) { + return lvec4( + ColorDist1(ref, A), + ColorDist1(ref, B), + ColorDist1(ref, C), + ColorDist1(ref, D) + ); +} + +vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { + lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); + lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1))); + lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1))); + lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); + lvec4 cc = lvec4(texture(tex, tex_coord)); + lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0))); + lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); + lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1))); + lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1))); + + lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); + lvec4 offset_br = ColorDist(cc, br, bc, bl, cl); + + // Calculate how different cc is from the texels around it + const lfloat plus_weight = lfloat(1.5); + const lfloat cross_weight = lfloat(1.5); + lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight)); + + if (total_dist == lfloat(0.0)) { + return cc; + } else { + // Add together all the distances with direction taken into account + lvec4 tmp = offset_tl - offset_br; + lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight; + + // When the image has thin points, they tend to split apart. + // This is because the texels all around are different and total_offset reaches into clear areas. + // This works pretty well to keep the offset in bounds for these cases. + lfloat clamp_val = length(total_offset) / total_dist; + vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0); + + return texture(tex, tex_coord - final_offset); + } +} + +void main() { + frag_color = Scaleforce(input_texture, tex_coord); +} diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag new file mode 100644 index 000000000..902b70c2b --- /dev/null +++ b/src/video_core/host_shaders/present_bicubic.frag @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; + +vec4 cubic(float v) { + vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; + vec4 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) * (1.0 / 6.0); +} + +vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { + + vec2 texSize = textureSize(textureSampler, 0); + vec2 invTexSize = 1.0 / texSize; + + texCoords = texCoords * texSize - 0.5; + + vec2 fxy = fract(texCoords); + texCoords -= fxy; + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= invTexSize.xxyy; + + vec4 sample0 = texture(textureSampler, offset.xz); + vec4 sample1 = texture(textureSampler, offset.yz); + vec4 sample2 = texture(textureSampler, offset.xw); + vec4 sample3 = texture(textureSampler, offset.yw); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +void main() { + color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag new file mode 100644 index 000000000..66fed3238 --- /dev/null +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -0,0 +1,70 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Code adapted from the following sources: +// - https://learnopengl.com/Advanced-Lighting/Bloom +// - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/ + +#version 460 core + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout(location = 0) in vec2 frag_tex_coord; + +layout(location = 0) out vec4 color; + +layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; + +const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308); +const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703); + +vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) { + vec4 result = vec4(0.0f); + for (int i = 1; i < 3; i++) { + result += texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) * weight[i]; + result += texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) * weight[i]; + } + return result; +} + +vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) { + vec4 result = vec4(0.0f); + for (int i = 1; i < 3; i++) { + result += texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) * weight[i]; + result += texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) * weight[i]; + } + return result; +} + +vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) { + vec4 result = vec4(0.0f); + for (int i = 1; i < 3; i++) { + result += + texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) * weight[i]; + result += + texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) * weight[i]; + } + return result; +} + +void main() { + vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0]; + vec2 tex_offset = 1.0f / textureSize(color_texture, 0); + + // TODO(Blinkhawk): This code can be optimized through shader group instructions. + vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; + vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); + color = vec4(combination + base, 1.0f); +} diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp new file mode 100644 index 000000000..1c96a7905 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp @@ -0,0 +1,11 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 +#define USE_EASU 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp new file mode 100644 index 000000000..f4daff739 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_EASU 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp new file mode 100644 index 000000000..6b6796dd1 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp @@ -0,0 +1,11 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 +#define USE_RCAS 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp new file mode 100644 index 000000000..f785eebf3 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_RCAS 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag new file mode 100644 index 000000000..924c03060 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag @@ -0,0 +1,7 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 + +#include "opengl_present_scaleforce.frag" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag new file mode 100644 index 000000000..a594b83ca --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag @@ -0,0 +1,5 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#include "opengl_present_scaleforce.frag" diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 187a28e4d..d4dd10bb6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -5,6 +5,7 @@ #include <algorithm> #include <span> +#include "shader_recompiler/backend/glasm/emit_glasm.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" @@ -229,8 +230,10 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff .padding = 0, }; buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); - glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, - reinterpret_cast<const GLuint*>(&ssbo)); + glProgramLocalParametersI4uivNV( + PROGRAM_LUT[stage], + Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1, + reinterpret_cast<const GLuint*>(&ssbo)); } } @@ -250,8 +253,10 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf .padding = 0, }; buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); - glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, - reinterpret_cast<const GLuint*>(&ssbo)); + glProgramLocalParametersI4uivNV( + GL_COMPUTE_PROGRAM_NV, + Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1, + reinterpret_cast<const GLuint*>(&ssbo)); } } diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index aa1cc592f..5c1f21c65 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -19,15 +19,6 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; -template <typename Range> -u32 AccumulateCount(const Range& range) { - u32 num{}; - for (const auto& desc : range) { - num += desc.count; - } - return num; -} - size_t ComputePipelineKey::Hash() const noexcept { return static_cast<size_t>( Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); @@ -58,17 +49,17 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); - num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); - num_image_buffers = AccumulateCount(info.image_buffer_descriptors); + num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); + num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); - const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; + const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)}; ASSERT(num_textures <= MAX_TEXTURES); - const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; + const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)}; ASSERT(num_images <= MAX_IMAGES); const bool is_glasm{assembly_program.handle != 0}; - const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; + const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)}; use_storage_buffers = !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); writes_global_memory = !use_storage_buffers && @@ -88,8 +79,7 @@ void ComputePipeline::Configure() { } texture_cache.SynchronizeComputeDescriptors(); - std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; - boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; + boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; std::array<GLuint, MAX_TEXTURES> samplers; std::array<GLuint, MAX_TEXTURES> textures; std::array<GLuint, MAX_IMAGES> images; @@ -119,33 +109,39 @@ void ComputePipeline::Configure() { } return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({ + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }); } }}; for (const auto& desc : info.texture_buffer_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({handle.first}); samplers[sampler_binding++] = 0; } } - std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc, false); + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({handle.first}); Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers[sampler_binding++] = sampler->Handle(); } } - std::ranges::for_each(info.image_descriptors, add_image); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); + for (const auto& desc : info.image_descriptors) { + add_image(desc, desc.is_written); + } + texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); @@ -161,7 +157,7 @@ void ComputePipeline::Configure() { if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)}; buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); @@ -177,23 +173,45 @@ void ComputePipeline::Configure() { buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); - const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + + num_image_buffers}; texture_binding += num_texture_buffers; image_binding += num_image_buffers; + u32 texture_scaling_mask{}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; + textures[texture_binding] = image_view.Handle(desc.type); + if (texture_cache.IsRescaling(image_view)) { + texture_scaling_mask |= 1u << texture_binding; + } + ++texture_binding; } } + u32 image_scaling_mask{}; for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } - images[image_binding++] = image_view.StorageView(desc.type, desc.format); + images[image_binding] = image_view.StorageView(desc.type, desc.format); + if (texture_cache.IsRescaling(image_view)) { + image_scaling_mask |= 1u << image_binding; + } + ++image_binding; + } + } + if (info.uses_rescaling_uniform) { + const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)}; + const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)}; + if (assembly_program.handle != 0) { + glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask, + float_image_scaling_mask, 0.0f, 0.0f); + } else { + glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask, + float_image_scaling_mask, 0.0f, 0.0f); } } if (texture_binding != 0) { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index bccb37a58..f8495896c 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,7 +15,7 @@ #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_notify.h" -#include "video_core/texture_cache/texture_cache_base.h" +#include "video_core/texture_cache/texture_cache.h" #if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] @@ -27,6 +27,7 @@ namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; using Shader::ImageDescriptor; +using Shader::NumDescriptors; using Shader::TextureBufferDescriptor; using Shader::TextureDescriptor; using Tegra::Texture::TexturePair; @@ -35,15 +36,6 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; -template <typename Range> -u32 AccumulateCount(const Range& range) { - u32 num{}; - for (const auto& desc : range) { - num += desc.count; - } - return num; -} - GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -204,23 +196,23 @@ GraphicsPipeline::GraphicsPipeline( base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + base_uniform_bindings[stage + 1] += NumDescriptors(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += NumDescriptors(info.storage_buffers_descriptors); } enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + const u32 num_tex_buffer_bindings{NumDescriptors(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; num_textures += num_tex_buffer_bindings; - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + const u32 num_img_buffers_bindings{NumDescriptors(info.image_buffer_descriptors)}; num_image_buffers[stage] += num_img_buffers_bindings; num_images += num_img_buffers_bindings; - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + num_textures += NumDescriptors(info.texture_descriptors); + num_images += NumDescriptors(info.image_descriptors); + num_storage_buffers += NumDescriptors(info.storage_buffers_descriptors); writes_global_memory |= std::ranges::any_of( info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); @@ -288,10 +280,9 @@ GraphicsPipeline::GraphicsPipeline( template <typename Spec> void GraphicsPipeline::ConfigureImpl(bool is_indexed) { - std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; - std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; + std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; std::array<GLuint, MAX_TEXTURES> samplers; - size_t image_view_index{}; + size_t views_index{}; GLsizei sampler_binding{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -336,30 +327,34 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; + views[views_index++] = { + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }; } }}; if constexpr (Spec::has_texture_buffers) { for (const auto& desc : info.texture_buffer_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; + views[views_index++] = {handle.first}; samplers[sampler_binding++] = 0; } } } if constexpr (Spec::has_image_buffers) { for (const auto& desc : info.image_buffer_descriptors) { - add_image(desc); + add_image(desc, false); } } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; + views[views_index++] = {handle.first}; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_binding++] = sampler->Handle(); @@ -367,7 +362,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } if constexpr (Spec::has_images) { for (const auto& desc : info.image_descriptors) { - add_image(desc); + add_image(desc, desc.is_written); } } }}; @@ -386,13 +381,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { config_stage(4); } - const std::span indices_span(image_view_indices.data(), image_view_index); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index)); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - ImageId* texture_buffer_index{image_view_ids.data()}; + VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { @@ -402,12 +396,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); ++index; - ++texture_buffer_index; + ++texture_buffer_it; } }}; const Shader::Info& info{stage_infos[stage]}; @@ -423,13 +417,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { add_buffer(desc); } } - for (const auto& desc : info.texture_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors); if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_it += Shader::NumDescriptors(info.image_descriptors); } }}; if constexpr (Spec::enabled_stages[0]) { @@ -453,12 +443,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (!is_built.load(std::memory_order::relaxed)) { WaitForBuild(); } - if (assembly_programs[0].handle != 0) { + const bool use_assembly{assembly_programs[0].handle != 0}; + if (use_assembly) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { program_manager.BindSourcePrograms(source_programs); } - const ImageId* views_it{image_view_ids.data()}; + const VideoCommon::ImageViewInOut* views_it{views.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; std::array<GLuint, MAX_TEXTURES> textures; @@ -473,20 +464,49 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { views_it += num_texture_buffers[stage]; views_it += num_image_buffers[stage]; + u32 texture_scaling_mask{}; + u32 image_scaling_mask{}; + u32 stage_texture_binding{}; + u32 stage_image_binding{}; + const auto& info{stage_infos[stage]}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; + textures[texture_binding] = image_view.Handle(desc.type); + if (texture_cache.IsRescaling(image_view)) { + texture_scaling_mask |= 1u << stage_texture_binding; + } + ++texture_binding; + ++stage_texture_binding; } } for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } - images[image_binding++] = image_view.StorageView(desc.type, desc.format); + images[image_binding] = image_view.StorageView(desc.type, desc.format); + if (texture_cache.IsRescaling(image_view)) { + image_scaling_mask |= 1u << stage_image_binding; + } + ++image_binding; + ++stage_image_binding; + } + } + if (info.uses_rescaling_uniform) { + const f32 float_texture_scaling_mask{Common::BitCast<f32>(texture_scaling_mask)}; + const f32 float_image_scaling_mask{Common::BitCast<f32>(image_scaling_mask)}; + const bool is_rescaling{texture_cache.IsRescaling()}; + const f32 config_down_factor{Settings::values.resolution_info.down_factor}; + const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; + if (use_assembly) { + glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_texture_scaling_mask, + float_image_scaling_mask, down_factor, 0.0f); + } else { + glProgramUniform4f(source_programs[stage].handle, 0, float_texture_scaling_mask, + float_image_scaling_mask, down_factor, 0.0f); } } }}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b909c387e..9b516c64f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -10,16 +10,14 @@ #include <string_view> #include <tuple> #include <utility> + #include <glad/glad.h> -#include "common/alignment.h" + #include "common/assert.h" #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" -#include "common/scope_exit.h" #include "common/settings.h" -#include "core/core.h" -#include "core/hle/kernel/k_process.h" #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" @@ -186,6 +184,10 @@ void RasterizerOpenGL::Clear() { SyncRasterizeEnable(); SyncStencilTestState(); + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UpdateRenderTargets(true); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + SyncViewport(); if (regs.clear_flags.scissor) { SyncScissorTest(); } else { @@ -194,10 +196,6 @@ void RasterizerOpenGL::Clear() { } UNIMPLEMENTED_IF(regs.clear_flags.viewport); - std::scoped_lock lock{texture_cache.mutex}; - texture_cache.UpdateRenderTargets(true); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); } @@ -216,8 +214,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); - SyncState(); - GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; if (!pipeline) { return; @@ -225,6 +221,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); + SyncState(); + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(pipeline, primitive_mode); @@ -535,7 +533,8 @@ void RasterizerOpenGL::SyncViewport() { auto& flags = maxwell3d.dirty.flags; const auto& regs = maxwell3d.regs; - const bool dirty_viewport = flags[Dirty::Viewports]; + const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports]; + const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports; const bool dirty_clip_control = flags[Dirty::ClipControl]; if (dirty_clip_control || flags[Dirty::FrontFace]) { @@ -555,8 +554,7 @@ void RasterizerOpenGL::SyncViewport() { } glFrontFace(mode); } - - if (dirty_viewport || flags[Dirty::ClipControl]) { + if (dirty_viewport || dirty_clip_control) { flags[Dirty::ClipControl] = false; bool flip_y = false; @@ -572,37 +570,58 @@ void RasterizerOpenGL::SyncViewport() { state_tracker.ClipControl(origin, depth); state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0); } + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; + const auto conv = [scale](float value) -> GLfloat { + float new_value = value * scale; + if (scale < 1.0f) { + const bool sign = std::signbit(value); + new_value = std::round(std::abs(new_value)); + new_value = sign ? -new_value : new_value; + } + return static_cast<GLfloat>(new_value); + }; if (dirty_viewport) { flags[Dirty::Viewports] = false; - const bool force = flags[Dirty::ViewportTransform]; + const bool force = flags[Dirty::ViewportTransform] || rescale_viewports; flags[Dirty::ViewportTransform] = false; + flags[VideoCommon::Dirty::RescaleViewports] = false; - for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) { - if (!force && !flags[Dirty::Viewport0 + i]) { + for (size_t index = 0; index < Maxwell::NumViewports; ++index) { + if (!force && !flags[Dirty::Viewport0 + index]) { continue; } - flags[Dirty::Viewport0 + i] = false; + flags[Dirty::Viewport0 + index] = false; + + const auto& src = regs.viewport_transform[index]; + GLfloat x = conv(src.translate_x - src.scale_x); + GLfloat y = conv(src.translate_y - src.scale_y); + GLfloat width = conv(src.scale_x * 2.0f); + GLfloat height = conv(src.scale_y * 2.0f); - const auto& src = regs.viewport_transform[i]; - const Common::Rectangle<f32> rect{src.GetRect()}; - glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(), - rect.GetHeight()); + if (height < 0) { + y += height; + height = -height; + } + glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f, + height != 0.0f ? height : 1.0f); const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; const GLdouble far_depth = src.translate_z + src.scale_z; if (device.HasDepthBufferFloat()) { - glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth); + glDepthRangeIndexeddNV(static_cast<GLuint>(index), near_depth, far_depth); } else { - glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); + glDepthRangeIndexed(static_cast<GLuint>(index), near_depth, far_depth); } if (!GLAD_GL_NV_viewport_swizzle) { continue; } - glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x), + glViewportSwizzleNV(static_cast<GLuint>(index), + MaxwellToGL::ViewportSwizzle(src.swizzle.x), MaxwellToGL::ViewportSwizzle(src.swizzle.y), MaxwellToGL::ViewportSwizzle(src.swizzle.z), MaxwellToGL::ViewportSwizzle(src.swizzle.w)); @@ -905,14 +924,34 @@ void RasterizerOpenGL::SyncLogicOpState() { void RasterizerOpenGL::SyncScissorTest() { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::Scissors]) { + if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) { return; } flags[Dirty::Scissors] = false; + const bool force = flags[VideoCommon::Dirty::RescaleScissors]; + flags[VideoCommon::Dirty::RescaleScissors] = false; + const auto& regs = maxwell3d.regs; + + const auto& resolution = Settings::values.resolution_info; + const bool is_rescaling{texture_cache.IsRescaling()}; + const u32 up_scale = is_rescaling ? resolution.up_scale : 1U; + const u32 down_shift = is_rescaling ? resolution.down_shift : 0U; + const auto scale_up = [up_scale, down_shift](u32 value) -> u32 { + if (value == 0) { + return 0U; + } + const u32 upset = value * up_scale; + u32 acumm{}; + if ((up_scale >> down_shift) == 0) { + acumm = upset % 2; + } + const u32 converted_value = upset >> down_shift; + return std::max<u32>(converted_value + acumm, 1U); + }; for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { - if (!flags[Dirty::Scissor0 + index]) { + if (!force && !flags[Dirty::Scissor0 + index]) { continue; } flags[Dirty::Scissor0 + index] = false; @@ -920,8 +959,8 @@ void RasterizerOpenGL::SyncScissorTest() { const auto& src = regs.scissor_test[index]; if (src.enable) { glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); - glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y, - src.max_x - src.min_x, src.max_y - src.min_y); + glScissorIndexed(static_cast<GLuint>(index), scale_up(src.min_x), scale_up(src.min_y), + scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y)); } else { glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); } @@ -937,8 +976,9 @@ void RasterizerOpenGL::SyncPointState() { oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); - - glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; + glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale)); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 8695c29e3..5e7101d28 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -166,7 +166,12 @@ void OGLFramebuffer::Create() { return; MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of + // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared + // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with + // mismatching size, this is why core framebuffers are preferred. glGenFramebuffers(1, &handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); } void OGLFramebuffer::Release() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 02682bd76..42ef67628 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -426,16 +426,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); - for (const auto& desc : programs[index].info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; - } + total_storage_buffers += + Shader::NumDescriptors(programs[index].info.storage_buffers_descriptors); } else { // VertexB path when VertexA is present. auto& program_va{programs[0]}; auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; - for (const auto& desc : program_vb.info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; - } + total_storage_buffers += + Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors); programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -510,10 +508,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; - u32 num_storage_buffers{}; - for (const auto& desc : program.info.storage_buffers_descriptors) { - num_storage_buffers += desc.count; - } + const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)}; Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 8c3ca3d82..9e7850428 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,8 +9,8 @@ #include <glad/glad.h> +#include "common/literals.h" #include "common/settings.h" - #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" @@ -42,6 +42,7 @@ using VideoCore::Surface::IsPixelFormatSRGB; using VideoCore::Surface::MaxPixelFormat; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceType; +using namespace Common::Literals; struct CopyOrigin { GLint level; @@ -147,6 +148,8 @@ GLenum AttachmentType(PixelFormat format) { switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) { case SurfaceType::Depth: return GL_DEPTH_ATTACHMENT; + case SurfaceType::Stencil: + return GL_STENCIL_ATTACHMENT; case SurfaceType::DepthStencil: return GL_DEPTH_STENCIL_ATTACHMENT; default: @@ -316,6 +319,52 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) { + const GLenum target = ImageTarget(info); + const GLsizei width = info.size.width; + const GLsizei height = info.size.height; + const GLsizei depth = info.size.depth; + const int max_host_mip_levels = std::bit_width(info.size.width); + const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); + const GLsizei num_layers = info.resources.layers; + const GLsizei num_samples = info.num_samples; + + GLuint handle = 0; + OGLTexture texture; + if (target != GL_TEXTURE_BUFFER) { + texture.Create(target); + handle = texture.handle; + } + switch (target) { + case GL_TEXTURE_1D_ARRAY: + glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); + break; + case GL_TEXTURE_2D_ARRAY: + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); + break; + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { + // TODO: Where should 'fixedsamplelocations' come from? + const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); + glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, + height >> samples_y, num_layers, GL_FALSE); + break; + } + case GL_TEXTURE_RECTANGLE: + glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); + break; + case GL_TEXTURE_3D: + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); + break; + case GL_TEXTURE_BUFFER: + UNREACHABLE(); + break; + default: + UNREACHABLE_MSG("Invalid target=0x{:x}", target); + break; + } + return texture; +} + [[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) { switch (format) { case PixelFormat::B5G6R5_UNORM: @@ -349,6 +398,10 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { UNREACHABLE_MSG("Invalid image format={}", format); return GL_R32UI; } + +[[nodiscard]] u32 NextPow2(u32 value) { + return 1U << (32U - std::countl_zero(value - 1U)); +} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -359,7 +412,8 @@ ImageBufferMap::~ImageBufferMap() { TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, StateTracker& state_tracker_) - : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { + : device{device_}, state_tracker{state_tracker_}, + util_shaders(program_manager), resolution{Settings::values.resolution_info} { static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; @@ -426,6 +480,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); + + if (resolution.active) { + for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) { + rescale_draw_fbos[i].Create(); + rescale_read_fbos[i].Create(); + } + } } TextureCacheRuntime::~TextureCacheRuntime() = default; @@ -442,6 +503,15 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { return download_buffers.RequestMap(size, false); } +u64 TextureCacheRuntime::GetDeviceLocalMemory() const { + if (GLAD_GL_NVX_gpu_memory_info) { + GLint cur_avail_mem_kb = 0; + glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb); + return static_cast<u64>(cur_avail_mem_kb) * 1_KiB; + } + return 2_GiB; // Return minimum requirements +} + void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) { const GLuint dst_name = dst_image.Handle(); @@ -458,6 +528,12 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, } } +void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, + std::span<const VideoCommon::ImageCopy> copies) { + LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); + format_conversion_pass.ConvertImage(dst, src, copies); +} + bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { return false; @@ -474,7 +550,7 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, ASSERT(src.info.type == ImageType::e3D); util_shaders.CopyBC4(dst, src, copies); } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { - bgr_copy_pass.CopyBGR(dst, src, copies); + format_conversion_pass.ConvertImage(dst, src, copies); } else { UNREACHABLE(); } @@ -605,13 +681,13 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req return found; } -Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, +Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) - : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { - if (CanBeAccelerated(runtime, info)) { + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { + if (CanBeAccelerated(*runtime, info)) { flags |= ImageFlagBits::AcceleratedUpload; } - if (IsConverted(runtime.device, info.format, info.type)) { + if (IsConverted(runtime->device, info.format, info.type)) { flags |= ImageFlagBits::Converted; gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; gl_format = GL_RGBA; @@ -622,58 +698,25 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = tuple.format; gl_type = tuple.type; } - const GLenum target = ImageTarget(info); - const GLsizei width = info.size.width; - const GLsizei height = info.size.height; - const GLsizei depth = info.size.depth; - const int max_host_mip_levels = std::bit_width(info.size.width); - const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); - const GLsizei num_layers = info.resources.layers; - const GLsizei num_samples = info.num_samples; - - GLuint handle = 0; - if (target != GL_TEXTURE_BUFFER) { - texture.Create(target); - handle = texture.handle; - } - switch (target) { - case GL_TEXTURE_1D_ARRAY: - glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); - break; - case GL_TEXTURE_2D_ARRAY: - glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); - break; - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { - // TODO: Where should 'fixedsamplelocations' come from? - const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); - glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, - height >> samples_y, num_layers, GL_FALSE); - break; - } - case GL_TEXTURE_RECTANGLE: - glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); - break; - case GL_TEXTURE_3D: - glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); - break; - case GL_TEXTURE_BUFFER: - UNREACHABLE(); - break; - default: - UNREACHABLE_MSG("Invalid target=0x{:x}", target); - break; - } - if (runtime.device.HasDebuggingToolAttached()) { + texture = MakeImage(info, gl_internal_format); + current_texture = texture.handle; + if (runtime->device.HasDebuggingToolAttached()) { const std::string name = VideoCommon::Name(*this); - glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, - static_cast<GLsizei>(name.size()), name.data()); + glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, + texture.handle, static_cast<GLsizei>(name.size()), name.data()); } } +Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} + Image::~Image() = default; void Image::UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(true); + } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); @@ -693,12 +736,18 @@ void Image::UploadMemory(const ImageBufferMap& map, } CopyBufferToImage(copy, map.offset); } + if (is_rescaled) { + ScaleUp(); + } } void Image::DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(); + } glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API - glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); @@ -716,6 +765,9 @@ void Image::DownloadMemory(ImageBufferMap& map, } CopyImageToBuffer(copy, map.offset); } + if (is_rescaled) { + ScaleUp(true); + } } GLuint Image::StorageHandle() noexcept { @@ -741,11 +793,11 @@ GLuint Image::StorageHandle() noexcept { return store_view.handle; } store_view.Create(); - glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0, + glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0, info.resources.levels, 0, info.resources.layers); return store_view.handle; default: - return texture.handle; + return current_texture; } } @@ -849,6 +901,146 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } +void Image::Scale(bool up_scale) { + const auto format_type = GetFormatType(info.format); + const GLenum attachment = [format_type] { + switch (format_type) { + case SurfaceType::ColorTexture: + return GL_COLOR_ATTACHMENT0; + case SurfaceType::Depth: + return GL_DEPTH_ATTACHMENT; + case SurfaceType::Stencil: + return GL_STENCIL_ATTACHMENT; + case SurfaceType::DepthStencil: + return GL_DEPTH_STENCIL_ATTACHMENT; + default: + UNREACHABLE(); + return GL_COLOR_ATTACHMENT0; + } + }(); + const GLenum mask = [format_type] { + switch (format_type) { + case SurfaceType::ColorTexture: + return GL_COLOR_BUFFER_BIT; + case SurfaceType::Depth: + return GL_DEPTH_BUFFER_BIT; + case SurfaceType::Stencil: + return GL_STENCIL_BUFFER_BIT; + case SurfaceType::DepthStencil: + return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + default: + UNREACHABLE(); + return GL_COLOR_BUFFER_BIT; + } + }(); + const size_t fbo_index = [format_type] { + switch (format_type) { + case SurfaceType::ColorTexture: + return 0; + case SurfaceType::Depth: + return 1; + case SurfaceType::Stencil: + return 2; + case SurfaceType::DepthStencil: + return 3; + default: + UNREACHABLE(); + return 0; + } + }(); + const bool is_2d = info.type == ImageType::e2D; + const bool is_color{(mask & GL_COLOR_BUFFER_BIT) != 0}; + // Integer formats must use NEAREST filter + const bool linear_color_format{is_color && !IsPixelFormatInteger(info.format)}; + const GLenum filter = linear_color_format ? GL_LINEAR : GL_NEAREST; + + const auto& resolution = runtime->resolution; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; + const u32 original_width = info.size.width; + const u32 original_height = info.size.height; + + if (!upscaled_backup.handle) { + auto dst_info = info; + dst_info.size.width = scaled_width; + dst_info.size.height = scaled_height; + upscaled_backup = MakeImage(dst_info, gl_internal_format); + } + const u32 src_width = up_scale ? original_width : scaled_width; + const u32 src_height = up_scale ? original_height : scaled_height; + const u32 dst_width = up_scale ? scaled_width : original_width; + const u32 dst_height = up_scale ? scaled_height : original_height; + const auto src_handle = up_scale ? texture.handle : upscaled_backup.handle; + const auto dst_handle = up_scale ? upscaled_backup.handle : texture.handle; + + // TODO (ameerj): Investigate other GL states that affect blitting. + glDisablei(GL_SCISSOR_TEST, 0); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(dst_width), + static_cast<GLfloat>(dst_height)); + + const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle; + const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; + for (s32 layer = 0; layer < info.resources.layers; ++layer) { + for (s32 level = 0; level < info.resources.levels; ++level) { + const u32 src_level_width = std::max(1u, src_width >> level); + const u32 src_level_height = std::max(1u, src_height >> level); + const u32 dst_level_width = std::max(1u, dst_width >> level); + const u32 dst_level_height = std::max(1u, dst_height >> level); + + glNamedFramebufferTextureLayer(read_fbo, attachment, src_handle, level, layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_handle, level, layer); + + glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, + 0, dst_level_width, dst_level_height, mask, filter); + } + } + current_texture = dst_handle; + auto& state_tracker = runtime->GetStateTracker(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); +} + +bool Image::ScaleUp(bool ignore) { + if (True(flags & ImageFlagBits::Rescaled)) { + return false; + } + if (gl_format == 0 && gl_type == 0) { + // compressed textures + return false; + } + if (info.type == ImageType::Linear) { + UNREACHABLE(); + return false; + } + flags |= ImageFlagBits::Rescaled; + if (!runtime->resolution.active) { + return false; + } + has_scaled = true; + if (ignore) { + current_texture = upscaled_backup.handle; + return true; + } + Scale(true); + return true; +} + +bool Image::ScaleDown(bool ignore) { + if (False(flags & ImageFlagBits::Rescaled)) { + return false; + } + flags &= ~ImageFlagBits::Rescaled; + if (!runtime->resolution.active) { + return false; + } + if (ignore) { + current_texture = texture.handle; + return true; + } + Scale(false); + return true; +} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image) : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { @@ -862,7 +1054,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI flat_range = info.range; set_object_label = device.HasDebuggingToolAttached(); is_render_target = info.IsRenderTarget(); - original_texture = image.texture.handle; + original_texture = image.Handle(); num_samples = image.info.num_samples; if (!is_render_target) { swizzle[0] = info.x_source; @@ -950,9 +1142,11 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info) : VideoCommon::ImageViewBase{info, view_info} {} -ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} +ImageView::~ImageView() = default; + GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { if (image_format == Shader::ImageFormat::Typeless) { return Handle(texture_type); @@ -1037,7 +1231,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); + const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); } else { LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); } @@ -1056,13 +1251,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of - // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared - // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with - // mismatching size, this is why core framebuffers are preferred. - GLuint handle; - glGenFramebuffers(1, &handle); - glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); + framebuffer.Create(); + GLuint handle = framebuffer.handle; GLsizei num_buffers = 0; std::array<GLenum, NUM_RT> gl_draw_buffers; @@ -1082,10 +1272,20 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM } if (const ImageView* const image_view = depth_buffer; image_view) { - if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) { + switch (GetFormatType(image_view->format)) { + case SurfaceType::Depth: + buffer_bits |= GL_DEPTH_BUFFER_BIT; + break; + case SurfaceType::Stencil: + buffer_bits |= GL_STENCIL_BUFFER_BIT; + break; + case SurfaceType::DepthStencil: buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } else { + break; + default: + UNREACHABLE(); buffer_bits |= GL_DEPTH_BUFFER_BIT; + break; } const GLenum attachment = AttachmentType(image_view->format); AttachTexture(handle, attachment, image_view); @@ -1110,39 +1310,41 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM const std::string name = VideoCommon::Name(key); glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data()); } - framebuffer.handle = handle; } -void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image, - std::span<const VideoCommon::ImageCopy> copies) { - static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; - const u32 requested_pbo_size = - std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes); +Framebuffer::~Framebuffer() = default; - if (bgr_pbo_size < requested_pbo_size) { - bgr_pbo.Create(); - bgr_pbo_size = requested_pbo_size; - glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); - } +void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies) { + const GLenum dst_target = ImageTarget(dst_image.info); + const GLenum src_target = ImageTarget(src_image.info); + const u32 img_bpp = BytesPerBlock(src_image.info.format); for (const ImageCopy& copy : copies) { - ASSERT(copy.src_offset == zero_offset); - ASSERT(copy.dst_offset == zero_offset); - + const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target); + const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target); + const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target); + const u32 copy_size = region.width * region.height * region.depth * img_bpp; + if (pbo_size < copy_size) { + intermediate_pbo.Create(); + pbo_size = NextPow2(copy_size); + glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY); + } // Copy from source to PBO glPixelStorei(GL_PACK_ALIGNMENT, 1); glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); - glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); - glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.src_subresource.num_layers, src_image.GlFormat(), - src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr); + glBindBuffer(GL_PIXEL_PACK_BUFFER, intermediate_pbo.handle); + glGetTextureSubImage(src_image.Handle(), src_origin.level, src_origin.x, src_origin.y, + src_origin.z, region.width, region.height, region.depth, + src_image.GlFormat(), src_image.GlType(), + static_cast<GLsizei>(pbo_size), nullptr); // Copy from PBO to destination in desired GL format glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr_pbo.handle); - glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.dst_subresource.num_layers, dst_image.GlFormat(), - dst_image.GlType(), nullptr); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, intermediate_pbo.handle); + glTextureSubImage3D(dst_image.Handle(), dst_origin.level, dst_origin.x, dst_origin.y, + dst_origin.z, region.width, region.height, region.depth, + dst_image.GlFormat(), dst_image.GlType(), nullptr); } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1ca2c90be..40acc8fad 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -15,6 +15,10 @@ #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" +namespace Settings { +struct ResolutionScalingInfo; +} + namespace OpenGL { class Device; @@ -48,17 +52,17 @@ struct FormatProperties { bool is_compressed; }; -class BGRCopyPass { +class FormatConversionPass { public: - BGRCopyPass() = default; - ~BGRCopyPass() = default; + FormatConversionPass() = default; + ~FormatConversionPass() = default; - void CopyBGR(Image& dst_image, Image& src_image, - std::span<const VideoCommon::ImageCopy> copies); + void ConvertImage(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies); private: - OGLBuffer bgr_pbo; - size_t bgr_pbo_size{}; + OGLBuffer intermediate_pbo; + size_t pbo_size{}; }; class TextureCacheRuntime { @@ -78,9 +82,13 @@ public: ImageBufferMap DownloadStagingBuffer(size_t size); + u64 GetDeviceLocalMemory() const; + void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); - void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + void ConvertImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { UNIMPLEMENTED(); } @@ -110,6 +118,12 @@ public: bool HasNativeASTC() const noexcept; + void TickFrame() {} + + StateTracker& GetStateTracker() { + return state_tracker; + } + private: struct StagingBuffers { explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); @@ -132,7 +146,7 @@ private: const Device& device; StateTracker& state_tracker; UtilShaders util_shaders; - BGRCopyPass bgr_copy_pass; + FormatConversionPass format_conversion_pass; std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; bool has_broken_texture_view_formats = false; @@ -149,6 +163,10 @@ private: OGLTextureView null_image_view_cube; std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; + + std::array<OGLFramebuffer, 4> rescale_draw_fbos; + std::array<OGLFramebuffer, 4> rescale_read_fbos; + const Settings::ResolutionScalingInfo& resolution; }; class Image : public VideoCommon::ImageBase { @@ -157,6 +175,7 @@ class Image : public VideoCommon::ImageBase { public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + explicit Image(const VideoCommon::NullImageParams&); ~Image(); @@ -174,7 +193,7 @@ public: GLuint StorageHandle() noexcept; GLuint Handle() const noexcept { - return texture.handle; + return current_texture; } GLuint GlFormat() const noexcept { @@ -185,16 +204,25 @@ public: return gl_type; } + bool ScaleUp(bool ignore = false); + + bool ScaleDown(bool ignore = false); + private: void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + void Scale(bool up_scale); + OGLTexture texture; + OGLTexture upscaled_backup; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; + TextureCacheRuntime* runtime{}; + GLuint current_texture{}; }; class ImageView : public VideoCommon::ImageViewBase { @@ -206,7 +234,15 @@ public: const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info); - explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); + + ~ImageView(); + + ImageView(const ImageView&) = delete; + ImageView& operator=(const ImageView&) = delete; + + ImageView(ImageView&&) = default; + ImageView& operator=(ImageView&&) = default; [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format); @@ -276,6 +312,14 @@ public: explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key); + ~Framebuffer(); + + Framebuffer(const Framebuffer&) = delete; + Framebuffer& operator=(const Framebuffer&) = delete; + + Framebuffer(Framebuffer&&) = default; + Framebuffer& operator=(Framebuffer&&) = default; + [[nodiscard]] GLuint Handle() const noexcept { return framebuffer.handle; } @@ -293,7 +337,8 @@ struct TextureCacheParams { static constexpr bool ENABLE_VALIDATION = true; static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; - static constexpr bool HAS_DEVICE_MEMORY_INFO = false; + static constexpr bool HAS_DEVICE_MEMORY_INFO = true; + static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 39158aa3e..daba42ed9 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -108,6 +108,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_STENCIL_INDEX8, GL_STENCIL, GL_UNSIGNED_BYTE}, // S8_UINT {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7d7cba69c..28daacd82 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -21,8 +21,13 @@ #include "core/memory.h" #include "core/perf_stats.h" #include "core/telemetry_session.h" +#include "video_core/host_shaders/fxaa_frag.h" +#include "video_core/host_shaders/fxaa_vert.h" #include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" +#include "video_core/host_shaders/present_bicubic_frag.h" +#include "video_core/host_shaders/present_gaussian_frag.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -208,7 +213,9 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf framebuffer_crop_rect = framebuffer.crop_rect; const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { + screen_info.was_accelerated = + rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); + if (screen_info.was_accelerated) { return; } @@ -251,12 +258,25 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs + fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); + fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); + present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); + present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); + present_gaussian_fragment = + CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); + present_scaleforce_fragment = + CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), + GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + present_sampler_nn.Create(); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); // Generate VBO handle for drawing vertex_buffer.Create(); @@ -274,6 +294,8 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + + fxaa_framebuffer.Create(); } void RendererOpenGL::AddTelemetryFields() { @@ -325,18 +347,130 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, texture.resource.Release(); texture.resource.Create(GL_TEXTURE_2D); glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); + fxaa_texture.Release(); + fxaa_texture.Create(GL_TEXTURE_2D); + glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F, + Settings::values.resolution_info.ScaleUp(screen_info.texture.width), + Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); + glNamedFramebufferTexture(fxaa_framebuffer.handle, GL_COLOR_ATTACHMENT0, fxaa_texture.handle, + 0); } void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { + // TODO: Signal state tracker about these changes + state_tracker.NotifyScreenDrawVertexArray(); + state_tracker.NotifyPolygonModes(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); + state_tracker.NotifyColorMask(0); + state_tracker.NotifyBlend0(); + state_tracker.NotifyFramebuffer(); + state_tracker.NotifyFrontFace(); + state_tracker.NotifyCullTest(); + state_tracker.NotifyDepthTest(); + state_tracker.NotifyStencilTest(); + state_tracker.NotifyPolygonOffset(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); + state_tracker.NotifyLogicOp(); + state_tracker.NotifyClipControl(); + state_tracker.NotifyAlphaTest(); + + state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); + // Update background color before drawing glClearColor(Settings::values.bg_red.GetValue() / 255.0f, Settings::values.bg_green.GetValue() / 255.0f, Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); + glEnable(GL_CULL_FACE); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glBindTextureUnit(0, screen_info.display_texture); + + if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa) { + program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); + + glEnablei(GL_SCISSOR_TEST, 0); + auto viewport_width = screen_info.texture.width; + auto scissor_width = framebuffer_crop_rect.GetWidth(); + if (scissor_width <= 0) { + scissor_width = viewport_width; + } + auto viewport_height = screen_info.texture.height; + auto scissor_height = framebuffer_crop_rect.GetHeight(); + if (scissor_height <= 0) { + scissor_height = viewport_height; + } + if (screen_info.was_accelerated) { + viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width); + scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width); + viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height); + scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height); + } + glScissorIndexed(0, 0, 0, scissor_width, scissor_height); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width), + static_cast<GLfloat>(viewport_height)); + glDepthRangeIndexed(0, 0.0, 0.0); + + glBindSampler(0, present_sampler.handle); + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fxaa_framebuffer.handle); + + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + + glBindTextureUnit(0, fxaa_texture.handle); + } + // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); - program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); + + GLuint fragment_handle; + const auto filter = Settings::values.scaling_filter.GetValue(); + switch (filter) { + case Settings::ScalingFilter::NearestNeighbor: + fragment_handle = present_bilinear_fragment.handle; + break; + case Settings::ScalingFilter::Bilinear: + fragment_handle = present_bilinear_fragment.handle; + break; + case Settings::ScalingFilter::Bicubic: + fragment_handle = present_bicubic_fragment.handle; + break; + case Settings::ScalingFilter::Gaussian: + fragment_handle = present_gaussian_fragment.handle; + break; + case Settings::ScalingFilter::ScaleForce: + fragment_handle = present_scaleforce_fragment.handle; + break; + case Settings::ScalingFilter::Fsr: + LOG_WARNING( + Render_OpenGL, + "FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce"); + fragment_handle = present_scaleforce_fragment.handle; + break; + default: + fragment_handle = present_bilinear_fragment.handle; + break; + } + program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); @@ -370,6 +504,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / static_cast<f32>(screen_info.texture.height); } + if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa && + !screen_info.was_accelerated) { + scale_u /= Settings::values.resolution_info.up_factor; + scale_v /= Settings::values.resolution_info.up_factor; + } const auto& screen = layout.screen; const std::array vertices = { @@ -380,47 +519,14 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { }; glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); - // TODO: Signal state tracker about these changes - state_tracker.NotifyScreenDrawVertexArray(); - state_tracker.NotifyPolygonModes(); - state_tracker.NotifyViewport0(); - state_tracker.NotifyScissor0(); - state_tracker.NotifyColorMask(0); - state_tracker.NotifyBlend0(); - state_tracker.NotifyFramebuffer(); - state_tracker.NotifyFrontFace(); - state_tracker.NotifyCullTest(); - state_tracker.NotifyDepthTest(); - state_tracker.NotifyStencilTest(); - state_tracker.NotifyPolygonOffset(); - state_tracker.NotifyRasterizeEnable(); - state_tracker.NotifyFramebufferSRGB(); - state_tracker.NotifyLogicOp(); - state_tracker.NotifyClipControl(); - state_tracker.NotifyAlphaTest(); - - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); - glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { glEnable(GL_FRAMEBUFFER_SRGB); } else { glDisable(GL_FRAMEBUFFER_SRGB); } - glDisable(GL_COLOR_LOGIC_OP); - glDisable(GL_DEPTH_TEST); - glDisable(GL_STENCIL_TEST); - glDisable(GL_POLYGON_OFFSET_FILL); - glDisable(GL_RASTERIZER_DISCARD); - glDisable(GL_ALPHA_TEST); - glDisablei(GL_BLEND, 0); glDisablei(GL_SCISSOR_TEST, 0); - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glCullFace(GL_BACK); - glFrontFace(GL_CW); - glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), static_cast<GLfloat>(layout.height)); - glDepthRangeIndexed(0, 0.0, 0.0); glEnableVertexAttribArray(PositionLocation); glEnableVertexAttribArray(TexCoordLocation); @@ -440,8 +546,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); } - glBindTextureUnit(0, screen_info.display_texture); - glBindSampler(0, present_sampler.handle); + if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { + glBindSampler(0, present_sampler.handle); + } else { + glBindSampler(0, present_sampler_nn.handle); + } glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index d455f572f..cda333cad 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -50,6 +50,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture{}; + bool was_accelerated = false; bool display_srgb{}; const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; @@ -109,9 +110,15 @@ private: // OpenGL object IDs OGLSampler present_sampler; + OGLSampler present_sampler_nn; OGLBuffer vertex_buffer; + OGLProgram fxaa_vertex; + OGLProgram fxaa_fragment; OGLProgram present_vertex; - OGLProgram present_fragment; + OGLProgram present_bilinear_fragment; + OGLProgram present_bicubic_fragment; + OGLProgram present_gaussian_fragment; + OGLProgram present_scaleforce_fragment; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer @@ -119,6 +126,8 @@ private: /// Display information for Switch screen ScreenInfo screen_info; + OGLTexture fxaa_texture; + OGLFramebuffer fxaa_framebuffer; /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 6c1b2f063..b3884a4f5 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -363,7 +363,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, BlitImageHelper::~BlitImageHelper() = default; -void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { @@ -373,9 +373,8 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV .operation = operation, }; const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; - const VkPipeline pipeline = FindOrEmplacePipeline(key); + const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, src_view](vk::CommandBuffer cmdbuf) { @@ -398,10 +397,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, Tegra::Engines::Fermi2D::Operation operation) { ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); - + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = operation, + }; const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; - const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); + const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, src_stencil_view, this](vk::CommandBuffer cmdbuf) { @@ -419,40 +421,45 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, } void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); + Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); + Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); + Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); + Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; const VkExtent2D extent{ - .width = src_image_view.size.width, - .height = src_image_view.size.height, + .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), + .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), }; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, + this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -488,7 +495,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb scheduler.InvalidateState(); } -VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) { +VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) { const auto it = std::ranges::find(blit_color_keys, key); if (it != blit_color_keys.end()) { return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; @@ -542,12 +549,14 @@ VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& ke return *blit_color_pipelines.back(); } -VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { - if (blit_depth_stencil_pipeline) { - return *blit_depth_stencil_pipeline; +VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key) { + const auto it = std::ranges::find(blit_depth_stencil_keys, key); + if (it != blit_depth_stencil_keys.end()) { + return *blit_depth_stencil_pipelines[std::distance(blit_depth_stencil_keys.begin(), it)]; } + blit_depth_stencil_keys.push_back(key); const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); - blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({ + blit_depth_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -560,15 +569,15 @@ VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, .layout = *two_textures_pipeline_layout, - .renderPass = renderpass, + .renderPass = key.renderpass, .subpass = 0, .basePipelineHandle = VK_NULL_HANDLE, .basePipelineIndex = 0, - }); - return *blit_depth_stencil_pipeline; + })); + return *blit_depth_stencil_pipelines.back(); } void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 33ee095c1..d77f76678 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -34,7 +34,7 @@ public: StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); - void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); @@ -44,21 +44,25 @@ public: const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); - void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); - void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); - void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - const ImageView& src_image_view); + const ImageView& src_image_view, u32 up_scale, u32 down_shift); - [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); + [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); - [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass); + [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); @@ -84,7 +88,8 @@ private: std::vector<BlitImagePipelineKey> blit_color_keys; std::vector<vk::Pipeline> blit_color_pipelines; - vk::Pipeline blit_depth_stencil_pipeline; + std::vector<BlitImagePipelineKey> blit_depth_stencil_keys; + std::vector<vk::Pipeline> blit_depth_stencil_pipelines; vk::Pipeline convert_d32_to_r32_pipeline; vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 68a23b602..31adada56 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -208,6 +208,9 @@ struct FormatTuple { {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM + // Stencil formats + {VK_FORMAT_S8_UINT, Attachable}, // S8_UINT + // DepthStencil formats {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated) diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 4847db6b6..11c160570 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -10,6 +10,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" @@ -20,6 +21,8 @@ namespace Vulkan { +using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS; + class DescriptorLayoutBuilder { public: DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} @@ -68,18 +71,28 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + using Shader::Backend::SPIRV::RescalingLayout; + const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u; + const VkPushConstantRange range{ + .stageFlags = static_cast<VkShaderStageFlags>( + is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS), + .offset = 0, + .size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset, + }; return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, .setLayoutCount = descriptor_set_layout ? 1U : 0U, .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &range, }); } void Add(const Shader::Info& info, VkShaderStageFlags stage) { + is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0; + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); @@ -115,6 +128,7 @@ private: } const Device* device{}; + bool is_compute{}; boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings; boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries; u32 binding{}; @@ -122,31 +136,68 @@ private: size_t offset{}; }; -inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, - const ImageId*& image_view_ids, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue) { - for (const auto& desc : info.texture_buffer_descriptors) { - image_view_ids += desc.count; +class RescalingPushConstant { +public: + explicit RescalingPushConstant() noexcept {} + + void PushTexture(bool is_rescaled) noexcept { + *texture_ptr |= is_rescaled ? texture_bit : 0u; + texture_bit <<= 1u; + if (texture_bit == 0u) { + texture_bit = 1u; + ++texture_ptr; + } } - for (const auto& desc : info.image_buffer_descriptors) { - image_view_ids += desc.count; + + void PushImage(bool is_rescaled) noexcept { + *image_ptr |= is_rescaled ? image_bit : 0u; + image_bit <<= 1u; + if (image_bit == 0u) { + image_bit = 1u; + ++image_ptr; + } } + + const std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS>& Data() const noexcept { + return words; + } + +private: + std::array<u32, NUM_TEXTURE_AND_IMAGE_SCALING_WORDS> words{}; + u32* texture_ptr{words.data()}; + u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS}; + u32 texture_bit{1u}; + u32 image_bit{1u}; +}; + +inline void PushImageDescriptors(TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, + const Shader::Info& info, RescalingPushConstant& rescaling, + const VkSampler*& samplers, + const VideoCommon::ImageViewInOut*& views) { + const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); + const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); + views += num_texture_buffers; + views += num_image_buffers; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { + const VideoCommon::ImageViewId image_view_id{(views++)->id}; const VkSampler sampler{*(samplers++)}; - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + ImageView& image_view{texture_cache.GetImageView(image_view_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } } for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + ImageView& image_view{texture_cache.GetImageView((views++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; update_descriptor_queue.AddImage(vk_image_view); + rescaling.PushImage(texture_cache.IsRescaling(image_view)); } } } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 888bc7392..1e447e621 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -12,14 +12,22 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/math_util.h" +#include "common/settings.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/gpu.h" +#include "video_core/host_shaders/fxaa_frag_spv.h" +#include "video_core/host_shaders/fxaa_vert_spv.h" +#include "video_core/host_shaders/present_bicubic_frag_spv.h" +#include "video_core/host_shaders/present_gaussian_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" +#include "video_core/renderer_vulkan/vk_fsr.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" @@ -144,8 +152,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); - UpdateDescriptorSet(image_index, - use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); + VkImageView source_image_view = + use_accelerated ? screen_info.image_view : *raw_image_views[image_index]; BufferData data; SetUniformData(data, layout); @@ -222,9 +230,134 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, read_barrier); cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, write_barrier); }); } + + const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); + if (use_accelerated && anti_alias_pass != Settings::AntiAliasing::None) { + UpdateAADescriptorSet(image_index, source_image_view, false); + const u32 up_scale = Settings::values.resolution_info.up_scale; + const u32 down_shift = Settings::values.resolution_info.down_shift; + VkExtent2D size{ + .width = (up_scale * framebuffer.width) >> down_shift, + .height = (up_scale * framebuffer.height) >> down_shift, + }; + scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = {}, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + + { + VkImageMemoryBarrier fsr_write_barrier = base_barrier; + fsr_write_barrier.image = *aa_image; + fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier); + } + + const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; + const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; + const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; + const VkClearValue clear_color{ + .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, + }; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = *aa_renderpass, + .framebuffer = *aa_framebuffer, + .renderArea = + { + .offset = {0, 0}, + .extent = size, + }, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast<float>(size.width), + .height = static_cast<float>(size.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const VkRect2D scissor{ + .offset = {0, 0}, + .extent = size, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); + switch (anti_alias_pass) { + case Settings::AntiAliasing::Fxaa: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); + break; + default: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); + break; + } + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, + aa_descriptor_sets[image_index], {}); + cmdbuf.Draw(4, 1, 0, 0); + cmdbuf.EndRenderPass(); + + { + VkImageMemoryBarrier blit_read_barrier = base_barrier; + blit_read_barrier.image = *aa_image; + blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); + } + }); + source_image_view = *aa_image_view; + } + + if (fsr) { + auto crop_rect = framebuffer.crop_rect; + if (crop_rect.GetWidth() == 0) { + crop_rect.right = framebuffer.width; + } + if (crop_rect.GetHeight() == 0) { + crop_rect.bottom = framebuffer.height; + } + crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); + VkExtent2D fsr_input_size{ + .width = Settings::values.resolution_info.ScaleUp(framebuffer.width), + .height = Settings::values.resolution_info.ScaleUp(framebuffer.height), + }; + VkImageView fsr_image_view = + fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); + UpdateDescriptorSet(image_index, fsr_image_view, true); + } else { + const bool is_nn = + Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; + UpdateDescriptorSet(image_index, source_image_view, is_nn); + } + scheduler.Record( [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; @@ -258,8 +391,28 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, .offset = {0, 0}, .extent = size, }; + const auto filter = Settings::values.scaling_filter.GetValue(); cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + switch (filter) { + case Settings::ScalingFilter::NearestNeighbor: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); + break; + case Settings::ScalingFilter::Bilinear: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); + break; + case Settings::ScalingFilter::Bicubic: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline); + break; + case Settings::ScalingFilter::Gaussian: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline); + break; + case Settings::ScalingFilter::ScaleForce: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline); + break; + default: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); + break; + } cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); @@ -281,11 +434,16 @@ VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& frameb } vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { + return CreateFramebuffer(image_view, extent, renderpass); +} + +vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent, + vk::RenderPass& rd) { return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .renderPass = *renderpass, + .renderPass = *rd, .attachmentCount = 1, .pAttachments = &image_view, .width = extent.width, @@ -308,9 +466,21 @@ void VKBlitScreen::CreateDynamicResources() { CreateRenderPass(); CreateFramebuffers(); CreateGraphicsPipeline(); + fsr.reset(); + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + CreateFSR(); + } } void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + if (!fsr) { + CreateFSR(); + } + } else { + fsr.reset(); + } + if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) { return; } @@ -324,7 +494,16 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) void VKBlitScreen::CreateShaders() { vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); - fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); + fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV); + fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV); + bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); + bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); + gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); + if (device.IsFloat16Supported()) { + scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); + } else { + scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); + } } void VKBlitScreen::CreateSemaphores() { @@ -344,6 +523,13 @@ void VKBlitScreen::CreateDescriptorPool() { }, }}; + const std::array<VkDescriptorPoolSize, 1> pool_sizes_aa{{ + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = static_cast<u32>(image_count * 2), + }, + }}; + const VkDescriptorPoolCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, @@ -353,19 +539,33 @@ void VKBlitScreen::CreateDescriptorPool() { .pPoolSizes = pool_sizes.data(), }; descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); + + const VkDescriptorPoolCreateInfo ci_aa{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = static_cast<u32>(image_count), + .poolSizeCount = static_cast<u32>(pool_sizes_aa.size()), + .pPoolSizes = pool_sizes_aa.data(), + }; + aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa); } void VKBlitScreen::CreateRenderPass() { + renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat()); +} + +vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) { const VkAttachmentDescription color_attachment{ .flags = 0, - .format = swapchain.GetImageViewFormat(), + .format = format, .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL, }; const VkAttachmentReference color_attachment_ref{ @@ -408,7 +608,7 @@ void VKBlitScreen::CreateRenderPass() { .pDependencies = &dependency, }; - renderpass = device.GetLogical().CreateRenderPass(renderpass_ci); + return device.GetLogical().CreateRenderPass(renderpass_ci); } void VKBlitScreen::CreateDescriptorSetLayout() { @@ -429,6 +629,23 @@ void VKBlitScreen::CreateDescriptorSetLayout() { }, }}; + const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings_aa{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = nullptr, + }, + }}; + const VkDescriptorSetLayoutCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -437,11 +654,21 @@ void VKBlitScreen::CreateDescriptorSetLayout() { .pBindings = layout_bindings.data(), }; + const VkDescriptorSetLayoutCreateInfo ci_aa{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(layout_bindings_aa.size()), + .pBindings = layout_bindings_aa.data(), + }; + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); + aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa); } void VKBlitScreen::CreateDescriptorSets() { const std::vector layouts(image_count, *descriptor_set_layout); + const std::vector layouts_aa(image_count, *aa_descriptor_set_layout); const VkDescriptorSetAllocateInfo ai{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, @@ -451,7 +678,16 @@ void VKBlitScreen::CreateDescriptorSets() { .pSetLayouts = layouts.data(), }; + const VkDescriptorSetAllocateInfo ai_aa{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = *aa_descriptor_pool, + .descriptorSetCount = static_cast<u32>(image_count), + .pSetLayouts = layouts_aa.data(), + }; + descriptor_sets = descriptor_pool.Allocate(ai); + aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa); } void VKBlitScreen::CreatePipelineLayout() { @@ -464,11 +700,63 @@ void VKBlitScreen::CreatePipelineLayout() { .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, }; + const VkPipelineLayoutCreateInfo ci_aa{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = aa_descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); + aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa); } void VKBlitScreen::CreateGraphicsPipeline() { - const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{ + const std::array<VkPipelineShaderStageCreateInfo, 2> bilinear_shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *bilinear_fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; + + const std::array<VkPipelineShaderStageCreateInfo, 2> bicubic_shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *bicubic_fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; + + const std::array<VkPipelineShaderStageCreateInfo, 2> gaussian_shader_stages{{ { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, @@ -483,7 +771,28 @@ void VKBlitScreen::CreateGraphicsPipeline() { .pNext = nullptr, .flags = 0, .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *fragment_shader, + .module = *gaussian_fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; + + const std::array<VkPipelineShaderStageCreateInfo, 2> scaleforce_shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *scaleforce_fragment_shader, .pName = "main", .pSpecializationInfo = nullptr, }, @@ -583,12 +892,12 @@ void VKBlitScreen::CreateGraphicsPipeline() { .pDynamicStates = dynamic_states.data(), }; - const VkGraphicsPipelineCreateInfo pipeline_ci{ + const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stageCount = static_cast<u32>(shader_stages.size()), - .pStages = shader_stages.data(), + .stageCount = static_cast<u32>(bilinear_shader_stages.size()), + .pStages = bilinear_shader_stages.data(), .pVertexInputState = &vertex_input_ci, .pInputAssemblyState = &input_assembly_ci, .pTessellationState = nullptr, @@ -605,7 +914,76 @@ void VKBlitScreen::CreateGraphicsPipeline() { .basePipelineIndex = 0, }; - pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); + const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(bicubic_shader_stages.size()), + .pStages = bicubic_shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = *renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; + + const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(gaussian_shader_stages.size()), + .pStages = gaussian_shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = *renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; + + const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(scaleforce_shader_stages.size()), + .pStages = scaleforce_shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = *renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; + + bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci); + bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci); + gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci); + scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci); } void VKBlitScreen::CreateSampler() { @@ -614,8 +992,29 @@ void VKBlitScreen::CreateSampler() { .pNext = nullptr, .flags = 0, .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_LINEAR, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; + + const VkSamplerCreateInfo ci_nn{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_NEAREST, .minFilter = VK_FILTER_NEAREST, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, @@ -631,6 +1030,7 @@ void VKBlitScreen::CreateSampler() { }; sampler = device.GetLogical().CreateSampler(ci); + nn_sampler = device.GetLogical().CreateSampler(ci_nn); } void VKBlitScreen::CreateFramebuffers() { @@ -639,7 +1039,7 @@ void VKBlitScreen::CreateFramebuffers() { for (std::size_t i = 0; i < image_count; ++i) { const VkImageView image_view{swapchain.GetImageViewIndex(i)}; - framebuffers[i] = CreateFramebuffer(image_view, size); + framebuffers[i] = CreateFramebuffer(image_view, size, renderpass); } } @@ -649,6 +1049,11 @@ void VKBlitScreen::ReleaseRawImages() { } raw_images.clear(); raw_buffer_commits.clear(); + + aa_image_view.reset(); + aa_image.reset(); + aa_commit = MemoryCommit{}; + buffer.reset(); buffer_commit = MemoryCommit{}; } @@ -675,8 +1080,11 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) raw_image_views.resize(image_count); raw_buffer_commits.resize(image_count); - for (size_t i = 0; i < image_count; ++i) { - raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ + const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, + u32 down_shift = 0) { + u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + : VK_IMAGE_USAGE_TRANSFER_DST_BIT; + return device.GetLogical().CreateImage(VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -684,26 +1092,30 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) .format = GetFormat(framebuffer), .extent = { - .width = framebuffer.width, - .height = framebuffer.height, + .width = (up_scale * framebuffer.width) >> down_shift, + .height = (up_scale * framebuffer.height) >> down_shift, .depth = 1, }, .mipLevels = 1, .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); - raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal); - raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ + }; + const auto create_commit = [&](vk::Image& image) { + return memory_allocator.Commit(image, MemoryUsage::DeviceLocal); + }; + const auto create_image_view = [&](vk::Image& image) { + return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, .flags = 0, - .image = *raw_images[i], + .image = *image, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = GetFormat(framebuffer), .components = @@ -722,10 +1134,211 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) .layerCount = 1, }, }); + }; + + for (size_t i = 0; i < image_count; ++i) { + raw_images[i] = create_image(); + raw_buffer_commits[i] = create_commit(raw_images[i]); + raw_image_views[i] = create_image_view(raw_images[i]); } + + // AA Resources + const u32 up_scale = Settings::values.resolution_info.up_scale; + const u32 down_shift = Settings::values.resolution_info.down_shift; + aa_image = create_image(true, up_scale, down_shift); + aa_commit = create_commit(aa_image); + aa_image_view = create_image_view(aa_image); + VkExtent2D size{ + .width = (up_scale * framebuffer.width) >> down_shift, + .height = (up_scale * framebuffer.height) >> down_shift, + }; + if (aa_renderpass) { + aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); + return; + } + aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false); + aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); + + const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *fxaa_vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *fxaa_fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; + + const auto vertex_binding_description = ScreenRectVertex::GetDescription(); + const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); + + const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &vertex_binding_description, + .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, + .pVertexAttributeDescriptions = vertex_attrs_description.data(), + }; + + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = VK_FALSE, + }; + + const VkPipelineViewportStateCreateInfo viewport_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, + }; + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + + const VkPipelineMultisampleStateCreateInfo multisampling_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + + const VkPipelineColorBlendAttachmentState color_blend_attachment{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + + static constexpr std::array dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast<u32>(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(fxaa_shader_stages.size()), + .pStages = fxaa_shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *aa_pipeline_layout, + .renderPass = *aa_renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; + + // AA + aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); } -void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { +void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, + bool nn) const { + const VkDescriptorImageInfo image_info{ + .sampler = nn ? *nn_sampler : *sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + const VkWriteDescriptorSet sampler_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = aa_descriptor_sets[image_index], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + const VkWriteDescriptorSet sampler_write_2{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = aa_descriptor_sets[image_index], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); +} + +void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, + bool nn) const { const VkDescriptorBufferInfo buffer_info{ .buffer = *buffer, .offset = offsetof(BufferData, uniform), @@ -746,7 +1359,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag }; const VkDescriptorImageInfo image_info{ - .sampler = *sampler, + .sampler = nn ? *nn_sampler : *sampler, .imageView = image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; @@ -798,17 +1411,19 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0); UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0); - // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering - // (e.g. handheld mode) on a 1920x1080 framebuffer. f32 scale_u = 1.0f; f32 scale_v = 1.0f; - if (framebuffer_crop_rect.GetWidth() > 0) { - scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / - static_cast<f32>(screen_info.width); - } - if (framebuffer_crop_rect.GetHeight() > 0) { - scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / - static_cast<f32>(screen_info.height); + // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering + // (e.g. handheld mode) on a 1920x1080 framebuffer. + if (!fsr) { + if (framebuffer_crop_rect.GetWidth() > 0) { + scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / + static_cast<f32>(screen_info.width); + } + if (framebuffer_crop_rect.GetHeight() > 0) { + scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / + static_cast<f32>(screen_info.height); + } } const auto& screen = layout.screen; @@ -822,6 +1437,15 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v); } +void VKBlitScreen::CreateFSR() { + const auto& layout = render_window.GetFramebufferLayout(); + const VkExtent2D fsr_size{ + .width = layout.screen.GetWidth(), + .height = layout.screen.GetHeight(), + }; + fsr = std::make_unique<FSR>(device, memory_allocator, image_count, fsr_size); +} + u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const { return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 430bcfbca..bbca71af3 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -34,6 +34,7 @@ namespace Vulkan { struct ScreenInfo; class Device; +class FSR; class RasterizerVulkan; class VKScheduler; class VKSwapchain; @@ -66,6 +67,9 @@ public: [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent); + [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, + VkExtent2D extent, vk::RenderPass& rd); + private: struct BufferData; @@ -74,6 +78,7 @@ private: void CreateSemaphores(); void CreateDescriptorPool(); void CreateRenderPass(); + vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true); void CreateDescriptorSetLayout(); void CreateDescriptorSets(); void CreatePipelineLayout(); @@ -88,11 +93,14 @@ private: void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); - void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; + void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; + void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, const Layout::FramebufferLayout layout) const; + void CreateFSR(); + u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, std::size_t image_index) const; @@ -107,14 +115,24 @@ private: const VKScreenInfo& screen_info; vk::ShaderModule vertex_shader; - vk::ShaderModule fragment_shader; + vk::ShaderModule fxaa_vertex_shader; + vk::ShaderModule fxaa_fragment_shader; + vk::ShaderModule bilinear_fragment_shader; + vk::ShaderModule bicubic_fragment_shader; + vk::ShaderModule gaussian_fragment_shader; + vk::ShaderModule scaleforce_fragment_shader; vk::DescriptorPool descriptor_pool; vk::DescriptorSetLayout descriptor_set_layout; vk::PipelineLayout pipeline_layout; - vk::Pipeline pipeline; + vk::Pipeline nearest_neightbor_pipeline; + vk::Pipeline bilinear_pipeline; + vk::Pipeline bicubic_pipeline; + vk::Pipeline gaussian_pipeline; + vk::Pipeline scaleforce_pipeline; vk::RenderPass renderpass; std::vector<vk::Framebuffer> framebuffers; vk::DescriptorSets descriptor_sets; + vk::Sampler nn_sampler; vk::Sampler sampler; vk::Buffer buffer; @@ -126,8 +144,22 @@ private: std::vector<vk::Image> raw_images; std::vector<vk::ImageView> raw_image_views; std::vector<MemoryCommit> raw_buffer_commits; + + vk::DescriptorPool aa_descriptor_pool; + vk::DescriptorSetLayout aa_descriptor_set_layout; + vk::PipelineLayout aa_pipeline_layout; + vk::Pipeline aa_pipeline; + vk::RenderPass aa_renderpass; + vk::Framebuffer aa_framebuffer; + vk::DescriptorSets aa_descriptor_sets; + vk::Image aa_image; + vk::ImageView aa_image_view; + MemoryCommit aa_commit; + u32 raw_width = 0; u32 raw_height = 0; + + std::unique_ptr<FSR> fsr; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 8ac58bc2f..5ffd93499 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -146,7 +146,7 @@ void BufferCacheRuntime::Finish() { } void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, - std::span<const VideoCommon::BufferCopy> copies) { + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { static constexpr VkMemoryBarrier READ_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, @@ -163,10 +163,42 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { + scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { + if (barrier) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); + } + cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + if (barrier) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); + } + }); +} + +void BufferCacheRuntime::PreCopyBarrier() { + static constexpr VkMemoryBarrier READ_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + }; + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); - cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + }); +} + +void BufferCacheRuntime::PostCopyBarrier() { + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); }); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index c27402ff0..1ee0d8420 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -69,8 +69,12 @@ public: [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + void PreCopyBarrier(); + void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, - std::span<const VideoCommon::BufferCopy> copies); + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + + void PostCopyBarrier(); void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 44faf626a..de36bcdb7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -22,6 +22,7 @@ namespace Vulkan { using Shader::ImageBufferDescriptor; +using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; using Tegra::Texture::TexturePair; ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, @@ -108,8 +109,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, texture_cache.SynchronizeComputeDescriptors(); static constexpr size_t max_elements = 64; - std::array<ImageId, max_elements> image_view_ids; - boost::container::static_vector<u32, max_elements> image_view_indices; + boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views; boost::container::static_vector<VkSampler, max_elements> samplers; const auto& qmd{kepler_compute.launch_description}; @@ -134,30 +134,37 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, } return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({ + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }); } }}; - std::ranges::for_each(info.texture_buffer_descriptors, add_image); - std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_buffer_descriptors) { + add_image(desc, false); + } + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc, false); + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({handle.first}); Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers.push_back(sampler->Handle()); } } - std::ranges::for_each(info.image_descriptors, add_image); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); + for (const auto& desc : info.image_descriptors) { + add_image(desc, desc.is_written); + } + texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); buffer_cache.UnbindComputeTextureBuffers(); - ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; @@ -166,11 +173,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + ImageView& image_view = texture_cache.GetImageView(views[index].id); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); - ++texture_buffer_ids; ++index; } }}; @@ -180,9 +186,11 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); + RescalingPushConstant rescaling; const VkSampler* samplers_it{samplers.data()}; - const ImageId* views_it{image_view_ids.data()}; - PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); + const VideoCommon::ImageViewInOut* views_it{views.data()}; + PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it, + views_it); if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built @@ -192,11 +200,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, }); } const void* const descriptor_data{update_descriptor_queue.UpdateData()}; - scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { + const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty(); + scheduler.Record([this, descriptor_data, is_rescaling, + rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); if (!descriptor_set_layout) { return; } + if (is_rescaling) { + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), + rescaling_data.data()); + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; const vk::Device& dev{device.GetLogical()}; dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp new file mode 100644 index 000000000..73629d229 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -0,0 +1,553 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cmath> +#include "common/bit_cast.h" +#include "common/common_types.h" +#include "common/div_ceil.h" + +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h" +#include "video_core/renderer_vulkan/vk_fsr.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { +namespace { +// Reimplementations of the constant generating functions in ffx_fsr1.h +// GCC generated a lot of warnings when using the official header. +u32 AU1_AH1_AF1(f32 f) { + static constexpr u32 base[512]{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, + 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, + 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, + 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, + 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, + 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, + 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, + 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + }; + static constexpr s8 shift[512]{ + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, + 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, + 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, + }; + const u32 u = Common::BitCast<u32>(f); + const u32 i = u >> 23; + return base[i] + ((u & 0x7fffff) >> shift[i]); +} + +u32 AU1_AH2_AF2(f32 a[2]) { + return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); +} + +void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, + f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, + f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { + con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); + con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); + con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); + con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); + con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); + con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); + con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); + con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); + con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); + con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); + con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); + con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); + con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); + con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); + con3[2] = con3[3] = 0; +} + +void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], + f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, + f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, + f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { + FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, + inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); + con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + + inputOffsetInPixelsX); + con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + + inputOffsetInPixelsY); +} + +void FsrRcasCon(u32* con, f32 sharpness) { + sharpness = std::exp2f(-sharpness); + f32 hSharp[2]{sharpness, sharpness}; + con[0] = Common::BitCast<u32>(sharpness); + con[1] = AU1_AH2_AF2(hSharp); + con[2] = 0; + con[3] = 0; +} +} // Anonymous namespace + +FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, + VkExtent2D output_size_) + : device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_}, + output_size{output_size_} { + + CreateImages(); + CreateSampler(); + CreateShaders(); + CreateDescriptorPool(); + CreateDescriptorSetLayout(); + CreateDescriptorSets(); + CreatePipelineLayout(); + CreatePipeline(); +} + +VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, + VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) { + + UpdateDescriptorSet(image_index, image_view); + + scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = {}, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); + + std::array<u32, 4 * 4> push_constants; + FsrEasuConOffset( + push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, + push_constants.data() + 12, + + static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), + static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height), + static_cast<f32>(output_size.width), static_cast<f32>(output_size.height), + static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top)); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); + + { + VkImageMemoryBarrier fsr_write_barrier = base_barrier; + fsr_write_barrier.image = *images[image_index], + fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier); + } + + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_sets[image_index * 2], {}); + cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), + Common::DivCeil(output_size.height, 16u), 1); + + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); + + FsrRcasCon(push_constants.data(), 0.25f); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); + + { + std::array<VkImageMemoryBarrier, 2> barriers; + auto& fsr_read_barrier = barriers[0]; + auto& blit_write_barrier = barriers[1]; + + fsr_read_barrier = base_barrier; + fsr_read_barrier.image = *images[image_index]; + fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + blit_write_barrier = base_barrier; + blit_write_barrier.image = *images[image_count + image_index]; + blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers); + } + + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_sets[image_index * 2 + 1], {}); + cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), + Common::DivCeil(output_size.height, 16u), 1); + + { + std::array<VkImageMemoryBarrier, 1> barriers; + auto& blit_read_barrier = barriers[0]; + + blit_read_barrier = base_barrier; + blit_read_barrier.image = *images[image_count + image_index]; + blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers); + } + }); + + return *image_views[image_count + image_index]; +} + +void FSR::CreateDescriptorPool() { + const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = static_cast<u32>(image_count * 2), + }, + { + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = static_cast<u32>(image_count * 2), + }, + }}; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = static_cast<u32>(image_count * 2), + .poolSizeCount = static_cast<u32>(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), + }; + descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); +} + +void FSR::CreateDescriptorSetLayout() { + const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = sampler.address(), + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = sampler.address(), + }, + }}; + + const VkDescriptorSetLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(layout_bindings.size()), + .pBindings = layout_bindings.data(), + }; + + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); +} + +void FSR::CreateDescriptorSets() { + const u32 sets = static_cast<u32>(image_count * 2); + const std::vector layouts(sets, *descriptor_set_layout); + + const VkDescriptorSetAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = *descriptor_pool, + .descriptorSetCount = sets, + .pSetLayouts = layouts.data(), + }; + + descriptor_sets = descriptor_pool.Allocate(ai); +} + +void FSR::CreateImages() { + images.resize(image_count * 2); + image_views.resize(image_count * 2); + buffer_commits.resize(image_count * 2); + + for (size_t i = 0; i < image_count * 2; ++i) { + images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R16G16B16A16_SFLOAT, + .extent = + { + .width = output_size.width, + .height = output_size.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }); + buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal); + image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *images[i], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R16G16B16A16_SFLOAT, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); + } +} + +void FSR::CreatePipelineLayout() { + VkPushConstantRange push_const{ + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = sizeof(std::array<u32, 4 * 4>), + }; + VkPipelineLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_const, + }; + + pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); +} + +void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { + const auto fsr_image_view = *image_views[image_index]; + const auto blit_image_view = *image_views[image_count + image_index]; + + const VkDescriptorImageInfo image_info{ + .sampler = VK_NULL_HANDLE, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkDescriptorImageInfo fsr_image_info{ + .sampler = VK_NULL_HANDLE, + .imageView = fsr_image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkDescriptorImageInfo blit_image_info{ + .sampler = VK_NULL_HANDLE, + .imageView = blit_image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + VkWriteDescriptorSet sampler_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index * 2], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + VkWriteDescriptorSet output_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index * 2], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &fsr_image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); + + sampler_write.dstSet = descriptor_sets[image_index * 2 + 1]; + sampler_write.pImageInfo = &fsr_image_info; + output_write.dstSet = descriptor_sets[image_index * 2 + 1]; + output_write.pImageInfo = &blit_image_info; + + device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); +} + +void FSR::CreateSampler() { + const VkSamplerCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_LINEAR, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; + + sampler = device.GetLogical().CreateSampler(ci); +} + +void FSR::CreateShaders() { + if (device.IsFloat16Supported()) { + easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV); + rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV); + } else { + easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV); + rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV); + } +} + +void FSR::CreatePipeline() { + VkPipelineShaderStageCreateInfo shader_stage_easu{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *easu_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }; + + VkPipelineShaderStageCreateInfo shader_stage_rcas{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *rcas_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }; + + VkComputePipelineCreateInfo pipeline_ci_easu{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = shader_stage_easu, + .layout = *pipeline_layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }; + + VkComputePipelineCreateInfo pipeline_ci_rcas{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = shader_stage_rcas, + .layout = *pipeline_layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }; + + easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu); + rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h new file mode 100644 index 000000000..6bbec3d36 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fsr.h @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/math_util.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class VKScheduler; + +class FSR { +public: + explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, + VkExtent2D output_size); + VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, + VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect); + +private: + void CreateDescriptorPool(); + void CreateDescriptorSetLayout(); + void CreateDescriptorSets(); + void CreateImages(); + void CreateSampler(); + void CreateShaders(); + void CreatePipeline(); + void CreatePipelineLayout(); + + void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; + + const Device& device; + MemoryAllocator& memory_allocator; + size_t image_count; + VkExtent2D output_size; + + vk::DescriptorPool descriptor_pool; + vk::DescriptorSetLayout descriptor_set_layout; + vk::DescriptorSets descriptor_sets; + vk::PipelineLayout pipeline_layout; + vk::ShaderModule easu_shader; + vk::ShaderModule rcas_shader; + vk::Pipeline easu_pipeline; + vk::Pipeline rcas_pipeline; + vk::Sampler sampler; + std::vector<vk::Image> images; + std::vector<vk::ImageView> image_views; + std::vector<MemoryCommit> buffer_commits; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8634c3316..616a7b457 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -32,6 +32,8 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET; +using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; using Tegra::Texture::TexturePair; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; @@ -235,6 +237,7 @@ GraphicsPipeline::GraphicsPipeline( stage_infos[stage] = *info; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + num_textures += Shader::NumDescriptors(info->texture_descriptors); } auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; @@ -277,11 +280,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { template <typename Spec> void GraphicsPipeline::ConfigureImpl(bool is_indexed) { - std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids; - std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices; + std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views; std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; size_t sampler_index{}; - size_t image_index{}; + size_t view_index{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -322,26 +324,30 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.first; + views[view_index++] = { + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }; } }}; if constexpr (Spec::has_texture_buffers) { for (const auto& desc : info.texture_buffer_descriptors) { - add_image(desc); + add_image(desc, false); } } if constexpr (Spec::has_image_buffers) { for (const auto& desc : info.image_buffer_descriptors) { - add_image(desc); + add_image(desc, false); } } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.first; + views[view_index++] = {handle.first}; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_index++] = sampler->Handle(); @@ -349,7 +355,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } if constexpr (Spec::has_images) { for (const auto& desc : info.image_descriptors) { - add_image(desc); + add_image(desc, desc.is_written); } } }}; @@ -368,10 +374,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { config_stage(4); } - const std::span indices_span(image_view_indices.data(), image_index); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), view_index)); - ImageId* texture_buffer_index{image_view_ids.data()}; + VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { @@ -381,12 +386,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); ++index; - ++texture_buffer_index; + ++texture_buffer_it; } }}; buffer_cache.UnbindGraphicsTextureBuffers(stage); @@ -402,13 +407,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { add_buffer(desc); } } - for (const auto& desc : info.texture_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors); if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_it += Shader::NumDescriptors(info.image_descriptors); } }}; if constexpr (Spec::enabled_stages[0]) { @@ -432,12 +433,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { update_descriptor_queue.Acquire(); + RescalingPushConstant rescaling; const VkSampler* samplers_it{samplers.data()}; - const ImageId* views_it{image_view_ids.data()}; + const VideoCommon::ImageViewInOut* views_it{views.data()}; const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, - update_descriptor_queue); + PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling, + samplers_it, views_it); }}; if constexpr (Spec::enabled_stages[0]) { prepare_stage(0); @@ -454,10 +456,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { prepare_stage(4); } - ConfigureDraw(); + ConfigureDraw(rescaling); } -void GraphicsPipeline::ConfigureDraw() { +void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); @@ -468,12 +470,25 @@ void GraphicsPipeline::ConfigureDraw() { build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } + const bool is_rescaling{texture_cache.IsRescaling()}; + const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)}; const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{update_descriptor_queue.UpdateData()}; - scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), + is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) { if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); } + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), + rescaling_data.data()); + if (update_rescaling) { + const f32 config_down_factor{Settings::values.resolution_info.down_factor}; + const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor), + &scale_down_factor); + } if (!descriptor_set_layout) { return; } @@ -826,18 +841,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { void GraphicsPipeline::Validate() { size_t num_images{}; for (const auto& info : stage_infos) { - for (const auto& desc : info.texture_buffer_descriptors) { - num_images += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_images += desc.count; - } - for (const auto& desc : info.texture_descriptors) { - num_images += desc.count; - } - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } + num_images += Shader::NumDescriptors(info.texture_buffer_descriptors); + num_images += Shader::NumDescriptors(info.image_buffer_descriptors); + num_images += Shader::NumDescriptors(info.texture_descriptors); + num_images += Shader::NumDescriptors(info.image_descriptors); } ASSERT(num_images <= MAX_IMAGE_ELEMENTS); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 1c780e944..a0c1d8f07 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -62,6 +62,7 @@ namespace Vulkan { class Device; class PipelineStatistics; class RenderPassCache; +class RescalingPushConstant; class VKScheduler; class VKUpdateDescriptorQueue; @@ -113,7 +114,7 @@ private: template <typename Spec> void ConfigureImpl(bool is_indexed); - void ConfigureDraw(); + void ConfigureDraw(const RescalingPushConstant& rescaling); void MakePipeline(VkRenderPass render_pass); @@ -138,6 +139,7 @@ private: std::array<Shader::Info, NUM_STAGES> stage_infos; std::array<u32, 5> enabled_uniform_buffer_masks{}; VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; + u32 num_textures{}; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 0886b7da8..9be9c9bed 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -70,7 +70,9 @@ public: return; } // If none of the above is hit, fallback to a regular wait - semaphore.Wait(tick); + while (!semaphore.Wait(tick)) { + } + Refresh(); } private: diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 30b47a7a0..fd334a146 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,18 +58,28 @@ struct DrawParams { bool is_indexed; }; -VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { +VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) { const auto& src = regs.viewport_transform[index]; - const float width = src.scale_x * 2.0f; - float y = src.translate_y - src.scale_y; - float height = src.scale_y * 2.0f; + const auto conv = [scale](float value) { + float new_value = value * scale; + if (scale < 1.0f) { + const bool sign = std::signbit(value); + new_value = std::round(std::abs(new_value)); + new_value = sign ? -new_value : new_value; + } + return new_value; + }; + const float x = conv(src.translate_x - src.scale_x); + const float width = conv(src.scale_x * 2.0f); + float y = conv(src.translate_y - src.scale_y); + float height = conv(src.scale_y * 2.0f); if (regs.screen_y_control.y_negate) { y += height; height = -height; } const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; VkViewport viewport{ - .x = src.translate_x - src.scale_x, + .x = x, .y = y, .width = width != 0.0f ? width : 1.0f, .height = height != 0.0f ? height : 1.0f, @@ -83,14 +93,27 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in return viewport; } -VkRect2D GetScissorState(const Maxwell& regs, size_t index) { +VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) { const auto& src = regs.scissor_test[index]; VkRect2D scissor; + const auto scale_up = [&](s32 value) -> s32 { + if (value == 0) { + return 0U; + } + const s32 upset = value * up_scale; + s32 acumm = 0; + if ((up_scale >> down_shift) == 0) { + acumm = upset % 2; + } + const s32 converted_value = (value * up_scale) >> down_shift; + return value < 0 ? std::min<s32>(converted_value - acumm, -1) + : std::max<s32>(converted_value + acumm, 1); + }; if (src.enable) { - scissor.offset.x = static_cast<s32>(src.min_x); - scissor.offset.y = static_cast<s32>(src.min_y); - scissor.extent.width = src.max_x - src.min_x; - scissor.extent.height = src.max_y - src.min_y; + scissor.offset.x = scale_up(static_cast<s32>(src.min_x)); + scissor.offset.y = scale_up(static_cast<s32>(src.min_y)); + scissor.extent.width = scale_up(src.max_x - src.min_x); + scissor.extent.height = scale_up(src.max_y - src.min_y); } else { scissor.offset.x = 0; scissor.offset.y = 0; @@ -199,7 +222,7 @@ void RasterizerVulkan::Clear() { query_cache.UpdateCounters(); - const auto& regs = maxwell3d.regs; + auto& regs = maxwell3d.regs; const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A; const bool use_depth = regs.clear_buffers.Z; @@ -214,8 +237,16 @@ void RasterizerVulkan::Clear() { const VkExtent2D render_area = framebuffer->RenderArea(); scheduler.RequestRenderpass(framebuffer); + u32 up_scale = 1; + u32 down_shift = 0; + if (texture_cache.IsRescaling()) { + up_scale = Settings::values.resolution_info.up_scale; + down_shift = Settings::values.resolution_info.down_shift; + } + UpdateViewportsState(regs); + VkClearRect clear_rect{ - .rect = GetScissorState(regs, 0), + .rect = GetScissorState(regs, 0, up_scale, down_shift), .baseArrayLayer = regs.clear_buffers.layer, .layerCount = 1, }; @@ -230,7 +261,38 @@ void RasterizerVulkan::Clear() { const u32 color_attachment = regs.clear_buffers.RT; if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { VkClearValue clear_value; - std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); + bool is_integer = false; + bool is_signed = false; + size_t int_size = 8; + for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) { + const auto& this_rt = regs.rt[i]; + if (this_rt.Address() == 0) { + continue; + } + if (this_rt.format == Tegra::RenderTargetFormat::NONE) { + continue; + } + const auto format = + VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format); + is_integer = IsPixelFormatInteger(format); + is_signed = IsPixelFormatSignedInteger(format); + int_size = PixelComponentSizeBitsInteger(format); + break; + } + if (!is_integer) { + std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); + } else if (!is_signed) { + for (size_t i = 0; i < 4; i++) { + clear_value.color.uint32[i] = static_cast<u32>( + static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]); + } + } else { + for (size_t i = 0; i < 4; i++) { + clear_value.color.int32[i] = + static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) * + (regs.clear_color[i] - 0.5f)); + } + } scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { const VkClearAttachment attachment{ @@ -595,15 +657,17 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg if (!state_tracker.TouchViewports()) { return; } + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; const std::array viewports{ - GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), - GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), - GetViewportState(device, regs, 4), GetViewportState(device, regs, 5), - GetViewportState(device, regs, 6), GetViewportState(device, regs, 7), - GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), - GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), - GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), - GetViewportState(device, regs, 14), GetViewportState(device, regs, 15), + GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), + GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), + GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale), + GetViewportState(device, regs, 6, scale), GetViewportState(device, regs, 7, scale), + GetViewportState(device, regs, 8, scale), GetViewportState(device, regs, 9, scale), + GetViewportState(device, regs, 10, scale), GetViewportState(device, regs, 11, scale), + GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), + GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), }; scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); } @@ -612,13 +676,29 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs if (!state_tracker.TouchScissors()) { return; } + u32 up_scale = 1; + u32 down_shift = 0; + if (texture_cache.IsRescaling()) { + up_scale = Settings::values.resolution_info.up_scale; + down_shift = Settings::values.resolution_info.down_shift; + } const std::array scissors{ - GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), - GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), - GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), - GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), - GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), - GetScissorState(regs, 15), + GetScissorState(regs, 0, up_scale, down_shift), + GetScissorState(regs, 1, up_scale, down_shift), + GetScissorState(regs, 2, up_scale, down_shift), + GetScissorState(regs, 3, up_scale, down_shift), + GetScissorState(regs, 4, up_scale, down_shift), + GetScissorState(regs, 5, up_scale, down_shift), + GetScissorState(regs, 6, up_scale, down_shift), + GetScissorState(regs, 7, up_scale, down_shift), + GetScissorState(regs, 8, up_scale, down_shift), + GetScissorState(regs, 9, up_scale, down_shift), + GetScissorState(regs, 10, up_scale, down_shift), + GetScissorState(regs, 11, up_scale, down_shift), + GetScissorState(regs, 12, up_scale, down_shift), + GetScissorState(regs, 13, up_scale, down_shift), + GetScissorState(regs, 14, up_scale, down_shift), + GetScissorState(regs, 15, up_scale, down_shift), }; scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 0c11c814f..3bfdf41ba 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -128,6 +128,15 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { return true; } +bool VKScheduler::UpdateRescaling(bool is_rescaling) { + if (state.rescaling_defined && is_rescaling == state.is_rescaling) { + return false; + } + state.rescaling_defined = true; + state.is_rescaling = is_rescaling; + return true; +} + void VKScheduler::WorkerThread(std::stop_token stop_token) { Common::SetCurrentThreadName("yuzu:VulkanWorker"); do { @@ -227,6 +236,7 @@ void VKScheduler::AllocateNewContext() { void VKScheduler::InvalidateState() { state.graphics_pipeline = nullptr; + state.rescaling_defined = false; state_tracker.InvalidateCommandBufferState(); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 85fc1712f..1b06c9296 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -56,6 +56,9 @@ public: /// Update the pipeline to the current execution context. bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); + /// Update the rescaling state. Returns true if the state has to be updated. + bool UpdateRescaling(bool is_rescaling); + /// Invalidates current command buffer state except for render passes void InvalidateState(); @@ -185,6 +188,8 @@ private: VkFramebuffer framebuffer = nullptr; VkExtent2D render_area = {0, 0}; GraphicsPipeline* graphics_pipeline = nullptr; + bool is_rescaling = false; + bool rescaling_defined = false; }; void WorkerThread(std::stop_token stop_token); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 2f2d6b31f..40a149832 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -71,11 +71,15 @@ public: } bool TouchViewports() { - return Exchange(Dirty::Viewports, false); + const bool dirty_viewports = Exchange(Dirty::Viewports, false); + const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false); + return dirty_viewports || rescale_viewports; } bool TouchScissors() { - return Exchange(Dirty::Scissors, false); + const bool dirty_scissors = Exchange(Dirty::Scissors, false); + const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false); + return dirty_scissors || rescale_scissors; } bool TouchDepthBias() { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 06c5fb867..9bc846b94 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -32,10 +32,12 @@ using Tegra::Engines::Fermi2D; using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; using VideoCommon::BufferImageCopy; +using VideoCommon::ImageFlagBits; using VideoCommon::ImageInfo; using VideoCommon::ImageType; using VideoCommon::SubresourceRange; using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsPixelFormatInteger; namespace { constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { @@ -100,6 +102,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; break; case VideoCore::Surface::SurfaceType::Depth: + case VideoCore::Surface::SurfaceType::Stencil: case VideoCore::Surface::SurfaceType::DepthStencil: usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; break; @@ -171,6 +174,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { return VK_IMAGE_ASPECT_COLOR_BIT; case VideoCore::Surface::SurfaceType::Depth: return VK_IMAGE_ASPECT_DEPTH_BIT; + case VideoCore::Surface::SurfaceType::Stencil: + return VK_IMAGE_ASPECT_STENCIL_BIT; case VideoCore::Surface::SurfaceType::DepthStencil: return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; default: @@ -193,6 +198,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { case PixelFormat::D16_UNORM: case PixelFormat::D32_FLOAT: return VK_IMAGE_ASPECT_DEPTH_BIT; + case PixelFormat::S8_UINT: + return VK_IMAGE_ASPECT_STENCIL_BIT; default: return VK_IMAGE_ASPECT_COLOR_BIT; } @@ -588,8 +595,158 @@ struct RangedBarrierRange { UNREACHABLE_MSG("Invalid image format={}", format); return VK_FORMAT_R32_UINT; } + +void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, + VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution, + bool up_scaling = true) { + const bool is_2d = info.type == ImageType::e2D; + const auto resources = info.resources; + const VkExtent2D extent{ + .width = info.size.width, + .height = info.size.height, + }; + // Depth and integer formats must use NEAREST filter for blits. + const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; + const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; + const VkFilter vk_filter = is_bilinear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d, + vk_filter, up_scaling](vk::CommandBuffer cmdbuf) { + const VkOffset2D src_size{ + .x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)), + .y = static_cast<s32>(is_2d && up_scaling ? extent.height + : resolution.ScaleUp(extent.height)), + }; + const VkOffset2D dst_size{ + .x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width), + .y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height) + : extent.height), + }; + boost::container::small_vector<VkImageBlit, 4> regions; + regions.reserve(resources.levels); + for (s32 level = 0; level < resources.levels; level++) { + regions.push_back({ + .srcSubresource{ + .aspectMask = aspect_mask, + .mipLevel = static_cast<u32>(level), + .baseArrayLayer = 0, + .layerCount = static_cast<u32>(resources.layers), + }, + .srcOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = std::max(1, src_size.x >> level), + .y = std::max(1, src_size.y >> level), + .z = 1, + }, + }, + .dstSubresource{ + .aspectMask = aspect_mask, + .mipLevel = static_cast<u32>(level), + .baseArrayLayer = 0, + .layerCount = static_cast<u32>(resources.layers), + }, + .dstOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = std::max(1, dst_size.x >> level), + .y = std::max(1, dst_size.y >> level), + .z = 1, + }, + }, + }); + } + const VkImageSubresourceRange subresource_range{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = subresource_range, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, // Discard contents + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = subresource_range, + }, + }; + const std::array write_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = subresource_range, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = subresource_range, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, read_barriers); + cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, nullptr, nullptr, write_barriers); + }); +} } // Anonymous namespace +TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_, + MemoryAllocator& memory_allocator_, + StagingBufferPool& staging_buffer_pool_, + BlitImageHelper& blit_image_helper_, + ASTCDecoderPass& astc_decoder_pass_, + RenderPassCache& render_pass_cache_) + : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, + staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_}, + astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_}, + resolution{Settings::values.resolution_info} {} + void TextureCacheRuntime::Finish() { scheduler.Finish(); } @@ -614,8 +771,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { - blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, - operation); + blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D), + dst_region, src_region, filter, operation); return; } if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { @@ -719,26 +876,29 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst }); } -void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { +void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, + bool rescaled) { + const u32 up_scale = rescaled ? resolution.up_scale : 1; + const u32 down_shift = rescaled ? resolution.down_shift : 0; switch (dst_view.format) { case PixelFormat::R16_UNORM: if (src_view.format == PixelFormat::D16_UNORM) { - return blit_image_helper.ConvertD16ToR16(dst, src_view); + return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); } break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { - return blit_image_helper.ConvertD32ToR32(dst, src_view); + return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); } break; case PixelFormat::D16_UNORM: if (src_view.format == PixelFormat::R16_UNORM) { - return blit_image_helper.ConvertR16ToD16(dst, src_view); + return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); } break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { - return blit_image_helper.ConvertR32ToD32(dst, src_view); + return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); } break; default: @@ -840,36 +1000,39 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { return device.GetDeviceLocalMemory(); } -Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, +void TextureCacheRuntime::TickFrame() {} + +Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) - : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, - image(MakeImage(runtime.device, info)), - commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, + runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)), + commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)), aspect_mask(ImageAspectMask(info.format)) { - if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { + if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { if (Settings::values.accelerate_astc.GetValue()) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; } else { flags |= VideoCommon::ImageFlagBits::Converted; } } - if (runtime.device.HasDebuggingToolAttached()) { - image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); + if (runtime->device.HasDebuggingToolAttached()) { + original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, .pNext = nullptr, .usage = VK_IMAGE_USAGE_STORAGE_BIT, }; - if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { - const auto& device = runtime.device.GetLogical(); + current_image = *original_image; + if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { + const auto& device = runtime->device.GetLogical(); storage_image_views.reserve(info.resources.levels); for (s32 level = 0; level < info.resources.levels; ++level) { storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = &storage_image_view_usage_create_info, .flags = 0, - .image = *image, + .image = *original_image, .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY, .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, .components{ @@ -890,26 +1053,39 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } +Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} + Image::~Image() = default; void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { // TODO: Move this to another API + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(true); + } scheduler->RequestOutsideRenderPassOperationContext(); std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); const VkBuffer src_buffer = map.buffer; - const VkImage vk_image = *image; + const VkImage vk_image = *original_image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; const bool is_initialized = std::exchange(initialized, true); scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies](vk::CommandBuffer cmdbuf) { CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); }); + if (is_rescaled) { + ScaleUp(); + } } void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(); + } std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); - scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, + scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, @@ -959,6 +1135,146 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, memory_write_barrier, nullptr, image_write_barrier); }); + if (is_rescaled) { + ScaleUp(true); + } +} + +bool Image::ScaleUp(bool ignore) { + if (True(flags & ImageFlagBits::Rescaled)) { + return false; + } + ASSERT(info.type != ImageType::Linear); + flags |= ImageFlagBits::Rescaled; + const auto& resolution = runtime->resolution; + if (!resolution.active) { + return false; + } + has_scaled = true; + const auto& device = runtime->device; + if (!scaled_image) { + const bool is_2d = info.type == ImageType::e2D; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; + auto scaled_info = info; + scaled_info.size.width = scaled_width; + scaled_info.size.height = scaled_height; + scaled_image = MakeImage(device, scaled_info); + auto& allocator = runtime->memory_allocator; + scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); + ignore = false; + } + current_image = *scaled_image; + if (ignore) { + return true; + } + + if (aspect_mask == 0) { + aspect_mask = ImageAspectMask(info.format); + } + static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; + const PixelFormat format = StorageFormat(info.format); + const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; + const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { + BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); + } else { + return BlitScaleHelper(true); + } + return true; +} + +bool Image::ScaleDown(bool ignore) { + if (False(flags & ImageFlagBits::Rescaled)) { + return false; + } + ASSERT(info.type != ImageType::Linear); + flags &= ~ImageFlagBits::Rescaled; + const auto& resolution = runtime->resolution; + if (!resolution.active) { + return false; + } + current_image = *original_image; + if (ignore) { + return true; + } + if (aspect_mask == 0) { + aspect_mask = ImageAspectMask(info.format); + } + static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; + const PixelFormat format = StorageFormat(info.format); + const auto& device = runtime->device; + const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; + const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { + BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); + } else { + return BlitScaleHelper(false); + } + return true; +} + +bool Image::BlitScaleHelper(bool scale_up) { + using namespace VideoCommon; + static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; + const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; + const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; + const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear + : Tegra::Engines::Fermi2D::Filter::Point; + + const bool is_2d = info.type == ImageType::e2D; + const auto& resolution = runtime->resolution; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; + std::unique_ptr<ImageView>& blit_view = scale_up ? scale_view : normal_view; + std::unique_ptr<Framebuffer>& blit_framebuffer = + scale_up ? scale_framebuffer : normal_framebuffer; + if (!blit_view) { + const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); + blit_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this); + } + + const u32 src_width = scale_up ? info.size.width : scaled_width; + const u32 src_height = scale_up ? info.size.height : scaled_height; + const u32 dst_width = scale_up ? scaled_width : info.size.width; + const u32 dst_height = scale_up ? scaled_height : info.size.height; + const Region2D src_region{ + .start = {0, 0}, + .end = {static_cast<s32>(src_width), static_cast<s32>(src_height)}, + }; + const Region2D dst_region{ + .start = {0, 0}, + .end = {static_cast<s32>(dst_width), static_cast<s32>(dst_height)}, + }; + const VkExtent2D extent{ + .width = std::max(scaled_width, info.size.width), + .height = std::max(scaled_height, info.size.width), + }; + + auto* view_ptr = blit_view.get(); + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { + if (!blit_framebuffer) { + blit_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent); + } + const auto color_view = blit_view->Handle(Shader::TextureType::Color2D); + + runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region, + src_region, operation, BLIT_OPERATION); + } else if (!runtime->device.IsBlitDepthStencilSupported() && + aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (!blit_framebuffer) { + blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent); + } + runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(), + blit_view->StencilView(), dst_region, + src_region, operation, BLIT_OPERATION); + } else { + // TODO: Use helper blits where applicable + flags &= ~ImageFlagBits::Rescaled; + LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", info.format); + return false; + } + return true; } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, @@ -1052,9 +1368,11 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} -ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params} {} +ImageView::~ImageView() = default; + VkImageView ImageView::DepthView() { if (depth_view) { return *depth_view; @@ -1137,7 +1455,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); } // Some games have samplers with garbage. Sanitize them here. - const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = pnext, @@ -1162,7 +1481,29 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t } Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, - ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { + ImageView* depth_buffer, const VideoCommon::RenderTargets& key) + : render_area{VkExtent2D{ + .width = key.size.width, + .height = key.size.height, + }} { + CreateFramebuffer(runtime, color_buffers, depth_buffer); + if (runtime.device.HasDebuggingToolAttached()) { + framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); + } +} + +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, + ImageView* depth_buffer, VkExtent2D extent) + : render_area{extent} { + std::array<ImageView*, NUM_RT> color_buffers{color_buffer}; + CreateFramebuffer(runtime, color_buffers, depth_buffer); +} + +Framebuffer::~Framebuffer() = default; + +void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, + std::span<ImageView*, NUM_RT> color_buffers, + ImageView* depth_buffer) { std::vector<VkImageView> attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1200,10 +1541,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM renderpass = runtime.render_pass_cache.Get(renderpass_key); - render_area = VkExtent2D{ - .width = key.size.width, - .height = key.size.height, - }; num_color_buffers = static_cast<u32>(num_colors); framebuffer = runtime.device.GetLogical().CreateFramebuffer({ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, @@ -1212,13 +1549,10 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM .renderPass = renderpass, .attachmentCount = static_cast<u32>(attachments.size()), .pAttachments = attachments.data(), - .width = key.size.width, - .height = key.size.height, + .width = render_area.width, + .height = render_area.height, .layers = static_cast<u32>(std::max(num_layers, 1)), }); - if (runtime.device.HasDebuggingToolAttached()) { - framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); - } } void TextureCacheRuntime::AccelerateImageUpload( diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b09c468e4..f5f8f9a74 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -13,6 +13,10 @@ #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace Settings { +struct ResolutionScalingInfo; +} + namespace Vulkan { using VideoCommon::ImageId; @@ -31,14 +35,14 @@ class RenderPassCache; class StagingBufferPool; class VKScheduler; -struct TextureCacheRuntime { - const Device& device; - VKScheduler& scheduler; - MemoryAllocator& memory_allocator; - StagingBufferPool& staging_buffer_pool; - BlitImageHelper& blit_image_helper; - ASTCDecoderPass& astc_decoder_pass; - RenderPassCache& render_pass_cache; +class TextureCacheRuntime { +public: + explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_, + MemoryAllocator& memory_allocator_, + StagingBufferPool& staging_buffer_pool_, + BlitImageHelper& blit_image_helper_, + ASTCDecoderPass& astc_decoder_pass_, + RenderPassCache& render_pass_cache_); void Finish(); @@ -46,6 +50,10 @@ struct TextureCacheRuntime { StagingBufferRef DownloadStagingBuffer(size_t size); + void TickFrame(); + + u64 GetDeviceLocalMemory() const; + void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, @@ -53,7 +61,7 @@ struct TextureCacheRuntime { void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); - void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); bool CanAccelerateImageUpload(Image&) const noexcept { return false; @@ -74,13 +82,21 @@ struct TextureCacheRuntime { return true; } - u64 GetDeviceLocalMemory() const; + const Device& device; + VKScheduler& scheduler; + MemoryAllocator& memory_allocator; + StagingBufferPool& staging_buffer_pool; + BlitImageHelper& blit_image_helper; + ASTCDecoderPass& astc_decoder_pass; + RenderPassCache& render_pass_cache; + const Settings::ResolutionScalingInfo& resolution; }; class Image : public VideoCommon::ImageBase { public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + explicit Image(const VideoCommon::NullImageParams&); ~Image(); @@ -97,7 +113,7 @@ public: std::span<const VideoCommon::BufferImageCopy> copies); [[nodiscard]] VkImage Handle() const noexcept { - return *image; + return current_image; } [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { @@ -113,14 +129,30 @@ public: return std::exchange(initialized, true); } + bool ScaleUp(bool ignore = false); + + bool ScaleDown(bool ignore = false); + private: - VKScheduler* scheduler; - vk::Image image; + bool BlitScaleHelper(bool scale_up); + + VKScheduler* scheduler{}; + TextureCacheRuntime* runtime{}; + + vk::Image original_image; MemoryCommit commit; - vk::ImageView image_view; std::vector<vk::ImageView> storage_image_views; VkImageAspectFlags aspect_mask = 0; bool initialized = false; + vk::Image scaled_image{}; + MemoryCommit scaled_commit{}; + VkImage current_image{}; + + std::unique_ptr<Framebuffer> scale_framebuffer; + std::unique_ptr<ImageView> scale_view; + + std::unique_ptr<Framebuffer> normal_framebuffer; + std::unique_ptr<ImageView> normal_view; }; class ImageView : public VideoCommon::ImageViewBase { @@ -128,7 +160,15 @@ public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, const VideoCommon::ImageViewInfo&, GPUVAddr); - explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); + + ~ImageView(); + + ImageView(const ImageView&) = delete; + ImageView& operator=(const ImageView&) = delete; + + ImageView(ImageView&&) = default; + ImageView& operator=(ImageView&&) = default; [[nodiscard]] VkImageView DepthView(); @@ -197,9 +237,23 @@ private: class Framebuffer { public: - explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers, + explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key); + explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, + ImageView* depth_buffer, VkExtent2D extent); + + ~Framebuffer(); + + Framebuffer(const Framebuffer&) = delete; + Framebuffer& operator=(const Framebuffer&) = delete; + + Framebuffer(Framebuffer&&) = default; + Framebuffer& operator=(Framebuffer&&) = default; + + void CreateFramebuffer(TextureCacheRuntime& runtime, + std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer); + [[nodiscard]] VkFramebuffer Handle() const noexcept { return *framebuffer; } @@ -262,6 +316,7 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; + static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false; using Runtime = Vulkan::TextureCacheRuntime; using Image = Vulkan::Image; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index eb1746265..a36015c8c 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -82,6 +82,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { return PixelFormat::D32_FLOAT; case Tegra::DepthFormat::D16_UNORM: return PixelFormat::D16_UNORM; + case Tegra::DepthFormat::S8_UINT: + return PixelFormat::S8_UINT; case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: return PixelFormat::D32_FLOAT_S8_UINT; default: @@ -214,6 +216,11 @@ SurfaceType GetFormatType(PixelFormat pixel_format) { } if (static_cast<std::size_t>(pixel_format) < + static_cast<std::size_t>(PixelFormat::MaxStencilFormat)) { + return SurfaceType::Stencil; + } + + if (static_cast<std::size_t>(pixel_format) < static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) { return SurfaceType::DepthStencil; } @@ -279,6 +286,80 @@ bool IsPixelFormatSRGB(PixelFormat format) { } } +bool IsPixelFormatInteger(PixelFormat format) { + switch (format) { + case PixelFormat::A8B8G8R8_SINT: + case PixelFormat::A8B8G8R8_UINT: + case PixelFormat::A2B10G10R10_UINT: + case PixelFormat::R8_SINT: + case PixelFormat::R8_UINT: + case PixelFormat::R16G16B16A16_SINT: + case PixelFormat::R16G16B16A16_UINT: + case PixelFormat::R32G32B32A32_UINT: + case PixelFormat::R32G32B32A32_SINT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R16_UINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_UINT: + case PixelFormat::R16G16_SINT: + case PixelFormat::R8G8_SINT: + case PixelFormat::R8G8_UINT: + case PixelFormat::R32G32_UINT: + case PixelFormat::R32_UINT: + case PixelFormat::R32_SINT: + return true; + default: + return false; + } +} + +bool IsPixelFormatSignedInteger(PixelFormat format) { + switch (format) { + case PixelFormat::A8B8G8R8_SINT: + case PixelFormat::R8_SINT: + case PixelFormat::R16G16B16A16_SINT: + case PixelFormat::R32G32B32A32_SINT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_SINT: + case PixelFormat::R8G8_SINT: + case PixelFormat::R32_SINT: + return true; + default: + return false; + } +} + +size_t PixelComponentSizeBitsInteger(PixelFormat format) { + switch (format) { + case PixelFormat::A8B8G8R8_SINT: + case PixelFormat::A8B8G8R8_UINT: + case PixelFormat::R8_SINT: + case PixelFormat::R8_UINT: + case PixelFormat::R8G8_SINT: + case PixelFormat::R8G8_UINT: + return 8; + case PixelFormat::A2B10G10R10_UINT: + return 10; + case PixelFormat::R16G16B16A16_SINT: + case PixelFormat::R16G16B16A16_UINT: + case PixelFormat::R16_UINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_UINT: + case PixelFormat::R16G16_SINT: + return 16; + case PixelFormat::R32G32B32A32_UINT: + case PixelFormat::R32G32B32A32_SINT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R32G32_UINT: + case PixelFormat::R32_UINT: + case PixelFormat::R32_SINT: + return 32; + default: + return 0; + } +} + std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; } diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 1503db81f..33e8d24ab 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -110,8 +110,12 @@ enum class PixelFormat { MaxDepthFormat, + // Stencil formats + S8_UINT = MaxDepthFormat, + MaxStencilFormat, + // DepthStencil formats - D24_UNORM_S8_UINT = MaxDepthFormat, + D24_UNORM_S8_UINT = MaxStencilFormat, S8_UINT_D24_UNORM, D32_FLOAT_S8_UINT, @@ -125,8 +129,9 @@ constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max enum class SurfaceType { ColorTexture = 0, Depth = 1, - DepthStencil = 2, - Invalid = 3, + Stencil = 2, + DepthStencil = 3, + Invalid = 4, }; enum class SurfaceTarget { @@ -229,6 +234,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{ 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM + 1, // S8_UINT 1, // D24_UNORM_S8_UINT 1, // S8_UINT_D24_UNORM 1, // D32_FLOAT_S8_UINT @@ -328,6 +334,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{ 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM + 1, // S8_UINT 1, // D24_UNORM_S8_UINT 1, // S8_UINT_D24_UNORM 1, // D32_FLOAT_S8_UINT @@ -427,6 +434,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{ 32, // E5B9G9R9_FLOAT 32, // D32_FLOAT 16, // D16_UNORM + 8, // S8_UINT 32, // D24_UNORM_S8_UINT 32, // S8_UINT_D24_UNORM 64, // D32_FLOAT_S8_UINT @@ -460,6 +468,12 @@ bool IsPixelFormatASTC(PixelFormat format); bool IsPixelFormatSRGB(PixelFormat format); +bool IsPixelFormatInteger(PixelFormat format); + +bool IsPixelFormatSignedInteger(PixelFormat format); + +size_t PixelComponentSizeBitsInteger(PixelFormat format); + std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format); diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index c6cf0583f..b2c81057b 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -194,6 +194,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str return "D32_FLOAT"; case PixelFormat::D16_UNORM: return "D16_UNORM"; + case PixelFormat::S8_UINT: + return "S8_UINT"; case PixelFormat::D24_UNORM_S8_UINT: return "D24_UNORM_S8_UINT"; case PixelFormat::S8_UINT_D24_UNORM: diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 6052d148a..3db2fdf34 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -60,15 +60,17 @@ namespace { ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, - converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, - cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, - mip_level_offsets{CalculateMipLevelOffsets(info)} { + converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{}, + has_scaled{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, + cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} { if (info.type == ImageType::e3D) { slice_offsets = CalculateSliceOffsets(info); slice_subresources = CalculateSliceSubresources(info); } } +ImageBase::ImageBase(const NullImageParams&) {} + ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} @@ -254,6 +256,8 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i } lhs.aliased_images.push_back(std::move(lhs_alias)); rhs.aliased_images.push_back(std::move(rhs_alias)); + lhs.flags &= ~ImageFlagBits::IsRescalable; + rhs.flags &= ~ImageFlagBits::IsRescalable; } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0c17a791b..89c111c00 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -33,6 +33,11 @@ enum class ImageFlagBits : u32 { ///< garbage collection priority Alias = 1 << 11, ///< This image has aliases and has priority on garbage ///< collection + + // Rescaler + Rescaled = 1 << 12, + CheckingRescalable = 1 << 13, + IsRescalable = 1 << 14, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -43,8 +48,11 @@ struct AliasedImage { ImageId id; }; +struct NullImageParams {}; + struct ImageBase { explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + explicit ImageBase(const NullImageParams&); [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; @@ -68,11 +76,18 @@ struct ImageBase { void CheckBadOverlapState(); void CheckAliasState(); + bool HasScaled() const { + return has_scaled; + } + ImageInfo info; u32 guest_size_bytes = 0; u32 unswizzled_size_bytes = 0; u32 converted_size_bytes = 0; + u32 scale_rating = 0; + u64 scale_tick = 0; + bool has_scaled = false; ImageFlagBits flags = ImageFlagBits::CpuModified; GPUVAddr gpu_addr = 0; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 64fd7010a..afb94082b 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -16,6 +16,7 @@ namespace VideoCommon { using Tegra::Texture::TextureType; using Tegra::Texture::TICEntry; using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceType; ImageInfo::ImageInfo(const TICEntry& config) noexcept { format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, @@ -31,6 +32,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { .depth = config.block_depth, }; } + rescaleable = false; tile_width_spacing = config.tile_width_spacing; if (config.texture_type != TextureType::Texture2D && config.texture_type != TextureType::Texture2DNoMipmap) { @@ -41,6 +43,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { ASSERT(config.BaseLayer() == 0); type = ImageType::e1D; size.width = config.Width(); + resources.layers = 1; break; case TextureType::Texture1DArray: UNIMPLEMENTED_IF(config.BaseLayer() != 0); @@ -52,12 +55,14 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { case TextureType::Texture2DNoMipmap: ASSERT(config.Depth() == 1); type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; + rescaleable = !config.IsPitchLinear(); size.width = config.Width(); size.height = config.Height(); resources.layers = config.BaseLayer() + 1; break; case TextureType::Texture2DArray: type = ImageType::e2D; + rescaleable = true; size.width = config.Width(); size.height = config.Height(); resources.layers = config.BaseLayer() + config.Depth(); @@ -82,10 +87,12 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { size.width = config.Width(); size.height = config.Height(); size.depth = config.Depth(); + resources.layers = 1; break; case TextureType::Texture1DBuffer: type = ImageType::Buffer; size.width = config.Width(); + resources.layers = 1; break; default: UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); @@ -95,12 +102,16 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { // FIXME: Call this without passing *this layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); + rescaleable &= (block.depth == 0) && resources.levels == 1; + rescaleable &= size.height > 256 || GetFormatType(format) != SurfaceType::ColorTexture; + downscaleable = size.height > 512; } } ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { const auto& rt = regs.rt[index]; format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); + rescaleable = false; if (rt.tile_mode.is_pitch_linear) { ASSERT(rt.tile_mode.is_3d == 0); type = ImageType::Linear; @@ -126,6 +137,9 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) type = ImageType::e3D; size.depth = rt.depth; } else { + rescaleable = block.depth == 0; + rescaleable &= size.height > 256; + downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = rt.depth; } @@ -135,6 +149,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); size.width = regs.zeta_width; size.height = regs.zeta_height; + rescaleable = false; resources.levels = 1; layer_stride = regs.zeta.layer_stride * 4; maybe_unaligned_layer_stride = layer_stride; @@ -153,6 +168,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { type = ImageType::e3D; size.depth = regs.zeta_depth; } else { + rescaleable = block.depth == 0; + downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = regs.zeta_depth; } @@ -161,6 +178,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); + rescaleable = false; if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { type = ImageType::Linear; size = Extent3D{ @@ -171,6 +189,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { pitch = config.pitch; } else { type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; + block = Extent3D{ .width = config.block_width, .height = config.block_height, @@ -183,6 +202,9 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { .height = config.height, .depth = 1, }; + rescaleable = block.depth == 0; + rescaleable &= size.height > 256; + downscaleable = size.height > 512; } } diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 5049fc36e..5932dcaba 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -15,7 +15,7 @@ using Tegra::Texture::TICEntry; using VideoCore::Surface::PixelFormat; struct ImageInfo { - explicit ImageInfo() = default; + ImageInfo() = default; explicit ImageInfo(const TICEntry& config) noexcept; explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; @@ -33,6 +33,8 @@ struct ImageInfo { u32 maybe_unaligned_layer_stride = 0; u32 num_samples = 1; u32 tile_width_spacing = 0; + bool rescaleable = false; + bool downscaleable = false; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 450becbeb..c7b4fc231 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -37,14 +37,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i } ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) - : format{info.format}, type{ImageViewType::Buffer}, size{ - .width = info.size.width, - .height = 1, - .depth = 1, - } { + : image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer}, + size{ + .width = info.size.width, + .height = 1, + .depth = 1, + } { ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); } -ImageViewBase::ImageViewBase(const NullImageParams&) {} +ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 903f715c5..9c24c5359 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -15,7 +15,7 @@ using VideoCore::Surface::PixelFormat; struct ImageViewInfo; struct ImageInfo; -struct NullImageParams {}; +struct NullImageViewParams {}; enum class ImageViewFlagBits : u16 { PreemtiveDownload = 1 << 0, @@ -28,7 +28,7 @@ struct ImageViewBase { explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id); explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); - explicit ImageViewBase(const NullImageParams&); + explicit ImageViewBase(const NullImageViewParams&); [[nodiscard]] bool IsBuffer() const noexcept { return type == ImageViewType::Buffer; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f70c1f764..241f71a91 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -7,6 +7,7 @@ #include <unordered_set> #include "common/alignment.h" +#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/texture_cache/image_view_base.h" @@ -44,21 +45,22 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& // Make sure the first index is reserved for the null resources // This way the null resource becomes a compile time constant - void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_images.insert(NullImageParams{})); + void(slot_image_views.insert(runtime, NullImageViewParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); if constexpr (HAS_DEVICE_MEMORY_INFO) { const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 3) / 10; - const u64 possible_critical_memory = (device_memory * 6) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + const u64 possible_expected_memory = (device_memory * 4) / 10; + const u64 possible_critical_memory = (device_memory * 7) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); minimum_memory = 0; } else { - // on OGL we can be more conservatives as the driver takes care. + // On OpenGL we can be more conservatives as the driver takes care. expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; - minimum_memory = expected_memory; + minimum_memory = 0; } } @@ -67,7 +69,7 @@ void TextureCache<P>::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; - size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); + size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { if (num_iterations == 0) { return true; @@ -89,7 +91,7 @@ void TextureCache<P>::RunGarbageCollector() { UntrackImage(image, image_id); } UnregisterImage(image_id); - DeleteImage(image_id); + DeleteImage(image_id, image.scale_tick > frame_tick + 5); return false; }; lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); @@ -103,6 +105,7 @@ void TextureCache<P>::TickFrame() { sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); + runtime.TickFrame(); ++frame_tick; } @@ -122,15 +125,14 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept { } template <class P> -void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, - std::span<ImageViewId> image_view_ids) { - FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +template <bool has_blacklists> +void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { + FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views); } template <class P> -void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, - std::span<ImageViewId> image_view_ids) { - FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { + FillImageViews<true>(compute_image_table, compute_image_view_ids, views); } template <class P> @@ -190,6 +192,102 @@ void TextureCache<P>::SynchronizeComputeDescriptors() { } template <class P> +bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { + auto& flags = maxwell3d.dirty.flags; + u32 scale_rating = 0; + bool rescaled = false; + std::array<ImageId, NUM_RT> tmp_color_images{}; + ImageId tmp_depth_image{}; + do { + flags[Dirty::RenderTargets] = false; + + has_deleted_images = false; + // Render target control is used on all render targets, so force look ups when this one is + // up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; + + scale_rating = 0; + bool any_rescaled = false; + bool can_rescale = true; + const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { + if (view_id != NULL_IMAGE_VIEW_ID && view_id != ImageViewId{}) { + const auto& view = slot_image_views[view_id]; + const auto image_id = view.image_id; + id_save = image_id; + auto& image = slot_images[image_id]; + can_rescale &= ImageCanRescale(image); + any_rescaled |= True(image.flags & ImageFlagBits::Rescaled) || + GetFormatType(image.info.format) != SurfaceType::ColorTexture; + scale_rating = std::max<u32>(scale_rating, image.scale_tick <= frame_tick + ? image.scale_rating + 1U + : image.scale_rating); + } else { + id_save = CORRUPT_ID; + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + check_rescale(color_buffer_id, tmp_color_images[index]); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + check_rescale(render_targets.depth_buffer_id, tmp_depth_image); + + if (can_rescale) { + rescaled = any_rescaled || scale_rating >= 2; + const auto scale_up = [this](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + ScaleUp(image); + } + }; + if (rescaled) { + for (size_t index = 0; index < NUM_RT; ++index) { + scale_up(tmp_color_images[index]); + } + scale_up(tmp_depth_image); + scale_rating = 2; + } + } else { + rescaled = false; + const auto scale_down = [this](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + ScaleDown(image); + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + scale_down(tmp_color_images[index]); + } + scale_down(tmp_depth_image); + scale_rating = 1; + } + } while (has_deleted_images); + const auto set_rating = [this, scale_rating](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + image.scale_rating = scale_rating; + if (image.scale_tick <= frame_tick) { + image.scale_tick = frame_tick + 1; + } + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + set_rating(tmp_color_images[index]); + } + set_rating(tmp_depth_image); + + return rescaled; +} + +template <class P> void TextureCache<P>::UpdateRenderTargets(bool is_clear) { using namespace VideoCommon::Dirty; auto& flags = maxwell3d.dirty.flags; @@ -202,24 +300,18 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); return; } - flags[Dirty::RenderTargets] = false; - // Render target control is used on all render targets, so force look ups when this one is up - const bool force = flags[Dirty::RenderTargetControl]; - flags[Dirty::RenderTargetControl] = false; + const bool rescaled = RescaleRenderTargets(is_clear); + if (is_rescaling != rescaled) { + flags[Dirty::RescaleViewports] = true; + flags[Dirty::RescaleScissors] = true; + is_rescaling = rescaled; + } for (size_t index = 0; index < NUM_RT; ++index) { ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - if (flags[Dirty::ColorBuffer0 + index] || force) { - flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); - } PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); } - if (flags[Dirty::ZetaBuffer] || force) { - flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); - } const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); @@ -227,9 +319,15 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { for (size_t index = 0; index < NUM_RT; ++index) { render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); } + u32 up_scale = 1; + u32 down_shift = 0; + if (is_rescaling) { + up_scale = Settings::values.resolution_info.up_scale; + down_shift = Settings::values.resolution_info.down_shift; + } render_targets.size = Extent2D{ - maxwell3d.regs.render_area.width, - maxwell3d.regs.render_area.height, + (maxwell3d.regs.render_area.width * up_scale) >> down_shift, + (maxwell3d.regs.render_area.height * up_scale) >> down_shift, }; flags[Dirty::DepthBiasGlobal] = true; @@ -241,17 +339,28 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { } template <class P> +template <bool has_blacklists> void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, - std::span<const u32> indices, - std::span<ImageViewId> image_view_ids) { - ASSERT(indices.size() <= image_view_ids.size()); + std::span<ImageViewInOut> views) { + bool has_blacklisted; do { has_deleted_images = false; - std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { - return VisitImageView(table, cached_image_view_ids, index); - }); - } while (has_deleted_images); + if constexpr (has_blacklists) { + has_blacklisted = false; + } + for (ImageViewInOut& view : views) { + view.id = VisitImageView(table, cached_image_view_ids, view.index); + if constexpr (has_blacklists) { + if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { + const ImageViewBase& image_view{slot_image_views[view.id]}; + auto& image = slot_images[image_view.image_id]; + has_blacklisted |= ScaleDown(image); + image.scale_rating = 0; + } + } + } + } while (has_deleted_images || (has_blacklists && has_blacklisted)); } template <class P> @@ -369,8 +478,43 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, PrepareImage(src_id, false, false); PrepareImage(dst_id, true, false); - ImageBase& dst_image = slot_images[dst_id]; - const ImageBase& src_image = slot_images[src_id]; + Image& dst_image = slot_images[dst_id]; + Image& src_image = slot_images[src_id]; + bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + + const bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1; + if (is_src_rescaled != is_dst_rescaled) { + if (ImageCanRescale(src_image)) { + ScaleUp(src_image); + is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + if (is_resolve) { + dst_image.info.rescaleable = true; + for (const auto& alias : dst_image.aliased_images) { + Image& other_image = slot_images[alias.id]; + other_image.info.rescaleable = true; + } + } + } + if (ImageCanRescale(dst_image)) { + ScaleUp(dst_image); + is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + } + } + if (is_resolve && (is_src_rescaled != is_dst_rescaled)) { + // A resolve requires both images to be the same dimensions. Resize down if needed. + ScaleDown(src_image); + ScaleDown(dst_image); + is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + } + const auto& resolution = Settings::values.resolution_info; + const auto scale_region = [&](Region2D& region) { + region.start.x = resolution.ScaleUp(region.start.x); + region.start.y = resolution.ScaleUp(region.start.y); + region.end.x = resolution.ScaleUp(region.end.x); + region.end.y = resolution.ScaleUp(region.end.y); + }; // TODO: Deduplicate const std::optional src_base = src_image.TryFindBase(src.Address()); @@ -378,20 +522,26 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - const Region2D src_region{ + Region2D src_region{ Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, }; + if (is_src_rescaled) { + scale_region(src_region); + } const std::optional dst_base = dst_image.TryFindBase(dst.Address()); const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); - const Region2D dst_region{ + Region2D dst_region{ Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, }; + if (is_dst_rescaled) { + scale_region(dst_region); + } // Always call this after src_framebuffer_id was queried, as the address might be invalidated. Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; @@ -487,6 +637,20 @@ void TextureCache<P>::PopAsyncFlushes() { } template <class P> +bool TextureCache<P>::IsRescaling() const noexcept { + return is_rescaling; +} + +template <class P> +bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcept { + if (image_view.type == ImageViewType::Buffer) { + return false; + } + const ImageBase& image = slot_images[image_view.image_id]; + return True(image.flags & ImageFlagBits::Rescaled); +} + +template <class P> bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { bool is_modified = false; ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { @@ -624,6 +788,105 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, } template <class P> +bool TextureCache<P>::ImageCanRescale(ImageBase& image) { + if (!image.info.rescaleable) { + return false; + } + if (Settings::values.resolution_info.downscale && !image.info.downscaleable) { + return false; + } + if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::CheckingRescalable))) { + return true; + } + if (True(image.flags & ImageFlagBits::IsRescalable)) { + return true; + } + image.flags |= ImageFlagBits::CheckingRescalable; + for (const auto& alias : image.aliased_images) { + Image& other_image = slot_images[alias.id]; + if (!ImageCanRescale(other_image)) { + image.flags &= ~ImageFlagBits::CheckingRescalable; + return false; + } + } + image.flags &= ~ImageFlagBits::CheckingRescalable; + image.flags |= ImageFlagBits::IsRescalable; + return true; +} + +template <class P> +void TextureCache<P>::InvalidateScale(Image& image) { + if (image.scale_tick <= frame_tick) { + image.scale_tick = frame_tick + 1; + } + const std::span<const ImageViewId> image_view_ids = image.image_view_ids; + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + image.image_view_ids.clear(); + image.image_view_infos.clear(); + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} + +template <class P> +u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) { + const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale * + Settings::values.resolution_info.up_scale); + const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift + + Settings::values.resolution_info.down_shift); + const u64 image_size_bytes = + static_cast<u64>(std::max(image.guest_size_bytes, image.unswizzled_size_bytes)); + const u64 tentative_size = (image_size_bytes * scale_up) >> down_shift; + const u64 fitted_size = Common::AlignUp(tentative_size, 1024); + return fitted_size; +} + +template <class P> +bool TextureCache<P>::ScaleUp(Image& image) { + const bool has_copy = image.HasScaled(); + const bool rescaled = image.ScaleUp(); + if (!rescaled) { + return false; + } + if (!has_copy) { + total_used_memory += GetScaledImageSizeBytes(image); + } + InvalidateScale(image); + return true; +} + +template <class P> +bool TextureCache<P>::ScaleDown(Image& image) { + const bool rescaled = image.ScaleDown(); + if (!rescaled) { + return false; + } + InvalidateScale(image); + return true; +} + +template <class P> ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); @@ -660,12 +923,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA std::vector<ImageId> right_aliased_ids; std::unordered_set<ImageId> ignore_textures; std::vector<ImageId> bad_overlap_ids; + std::vector<ImageId> all_siblings; + const bool this_is_linear = info.type == ImageType::Linear; const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { if (True(overlap.flags & ImageFlagBits::Remapped)) { ignore_textures.insert(overlap_id); return; } - if (info.type == ImageType::Linear) { + const bool overlap_is_linear = overlap.info.type == ImageType::Linear; + if (this_is_linear != overlap_is_linear) { + return; + } + if (this_is_linear && overlap_is_linear) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { // Alias linear images with the same pitch left_aliased_ids.push_back(overlap_id); @@ -681,6 +950,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA cpu_addr = solution->cpu_addr; new_info.resources = solution->resources; overlap_ids.push_back(overlap_id); + all_siblings.push_back(overlap_id); return; } static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; @@ -688,10 +958,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { left_aliased_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::Alias; + all_siblings.push_back(overlap_id); } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, broken_views, native_bgr)) { right_aliased_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::Alias; + all_siblings.push_back(overlap_id); } else { bad_overlap_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::BadOverlap; @@ -709,6 +981,32 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } }; ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + + bool can_rescale = info.rescaleable; + bool any_rescaled = false; + for (const ImageId sibling_id : all_siblings) { + if (!can_rescale) { + break; + } + Image& sibling = slot_images[sibling_id]; + can_rescale &= ImageCanRescale(sibling); + any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); + } + + can_rescale &= any_rescaled; + + if (can_rescale) { + for (const ImageId sibling_id : all_siblings) { + Image& sibling = slot_images[sibling_id]; + ScaleUp(sibling); + } + } else { + for (const ImageId sibling_id : all_siblings) { + Image& sibling = slot_images[sibling_id]; + ScaleDown(sibling); + } + } + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; @@ -731,14 +1029,23 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA // TODO: Only upload what we need RefreshContents(new_image, new_image_id); + if (can_rescale) { + ScaleUp(new_image); + } else { + ScaleDown(new_image); + } + for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; if (overlap.info.num_samples != new_image.info.num_samples) { LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); } else { + const auto& resolution = Settings::values.resolution_info; const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); - runtime.CopyImage(new_image, overlap, copies); + const u32 up_scale = can_rescale ? resolution.up_scale : 1; + const u32 down_shift = can_rescale ? resolution.down_shift : 0; + auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); + runtime.CopyImage(new_image, overlap, std::move(copies)); } if (True(overlap.flags & ImageFlagBits::Tracked)) { UntrackImage(overlap, overlap_id); @@ -1083,13 +1390,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::BadOverlap; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); lru_cache.Free(image.lru_index); const auto& clear_page_table = [this, image_id]( @@ -1213,8 +1513,18 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { } template <class P> -void TextureCache<P>::DeleteImage(ImageId image_id) { +void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { ImageBase& image = slot_images[image_id]; + if (image.HasScaled()) { + total_used_memory -= GetScaledImageSizeBytes(image); + } + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); const GPUVAddr gpu_addr = image.gpu_addr; const auto alloc_it = image_allocs_table.find(gpu_addr); if (alloc_it == image_allocs_table.end()) { @@ -1269,10 +1579,14 @@ void TextureCache<P>::DeleteImage(ImageId image_id) { num_removed_overlaps); } for (const ImageViewId image_view_id : image_view_ids) { - sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + if (!immediate_delete) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + } slot_image_views.erase(image_view_id); } - sentenced_images.Push(std::move(slot_images[image_id])); + if (!immediate_delete) { + sentenced_images.Push(std::move(slot_images[image_id])); + } slot_images.erase(image_id); alloc_images.erase(alloc_image_it); @@ -1306,6 +1620,9 @@ void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_vi auto it = framebuffers.begin(); while (it != framebuffers.end()) { if (it->first.Contains(removed_views)) { + auto framebuffer_id = it->second; + ASSERT(framebuffer_id); + sentenced_framebuffers.Push(std::move(slot_framebuffers[framebuffer_id])); it = framebuffers.erase(it); } else { ++it; @@ -1322,26 +1639,60 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept { template <class P> void TextureCache<P>::SynchronizeAliases(ImageId image_id) { boost::container::small_vector<const AliasedImage*, 1> aliased_images; - ImageBase& image = slot_images[image_id]; + Image& image = slot_images[image_id]; + bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); u64 most_recent_tick = image.modification_tick; for (const AliasedImage& aliased : image.aliased_images) { ImageBase& aliased_image = slot_images[aliased.id]; if (image.modification_tick < aliased_image.modification_tick) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); + any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); } } if (aliased_images.empty()) { return; } + const bool can_rescale = ImageCanRescale(image); + if (any_rescaled) { + if (can_rescale) { + ScaleUp(image); + } else { + ScaleDown(image); + } + } image.modification_tick = most_recent_tick; std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { const ImageBase& lhs_image = slot_images[lhs->id]; const ImageBase& rhs_image = slot_images[rhs->id]; return lhs_image.modification_tick < rhs_image.modification_tick; }); + const auto& resolution = Settings::values.resolution_info; for (const AliasedImage* const aliased : aliased_images) { - CopyImage(image_id, aliased->id, aliased->copies); + if (!resolution.active | !any_rescaled) { + CopyImage(image_id, aliased->id, aliased->copies); + continue; + } + Image& aliased_image = slot_images[aliased->id]; + if (!can_rescale) { + ScaleDown(aliased_image); + CopyImage(image_id, aliased->id, aliased->copies); + continue; + } + ScaleUp(aliased_image); + + const bool both_2d{image.info.type == ImageType::e2D && + aliased_image.info.type == ImageType::e2D}; + auto copies = aliased->copies; + for (auto copy : copies) { + copy.extent.width = std::max<u32>( + (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1); + if (both_2d) { + copy.extent.height = std::max<u32>( + (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1); + } + } + CopyImage(image_id, aliased->id, copies); } } @@ -1377,9 +1728,25 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi } template <class P> -void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { +void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) { Image& dst = slot_images[dst_id]; Image& src = slot_images[src_id]; + const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ASSERT(True(dst.flags & ImageFlagBits::Rescaled)); + const bool both_2d{src.info.type == ImageType::e2D && dst.info.type == ImageType::e2D}; + const auto& resolution = Settings::values.resolution_info; + for (auto& copy : copies) { + copy.src_offset.x = resolution.ScaleUp(copy.src_offset.x); + copy.dst_offset.x = resolution.ScaleUp(copy.dst_offset.x); + copy.extent.width = resolution.ScaleUp(copy.extent.width); + if (both_2d) { + copy.src_offset.y = resolution.ScaleUp(copy.src_offset.y); + copy.dst_offset.y = resolution.ScaleUp(copy.dst_offset.y); + copy.extent.height = resolution.ScaleUp(copy.extent.height); + } + } + } const auto dst_format_type = GetFormatType(dst.info.format); const auto src_format_type = GetFormatType(src.info.format); if (src_format_type == dst_format_type) { @@ -1392,6 +1759,9 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const } UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) { + return runtime.ConvertImage(dst, src, copies); + } for (const ImageCopy& copy : copies) { UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); @@ -1424,7 +1794,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const }; UNIMPLEMENTED_IF(copy.extent != expected_size); - runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled); } } @@ -1433,8 +1803,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) if (*old_id == new_id) { return; } - if (*old_id) { - const ImageViewBase& old_view = slot_image_views[*old_id]; + if (new_id) { + const ImageViewBase& old_view = slot_image_views[new_id]; if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { uncommitted_downloads.push_back(old_view.image_id); } @@ -1447,10 +1817,18 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( ImageId image_id, const ImageViewInfo& view_info) { const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); const ImageBase& image = slot_images[image_id]; + const bool is_rescaled = True(image.flags & ImageFlagBits::Rescaled); const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; - const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + if (is_rescaled) { + const auto& resolution = Settings::values.resolution_info; + extent.width = resolution.ScaleUp(extent.width); + if (image.info.type == ImageType::e2D) { + extent.height = resolution.ScaleUp(extent.height); + } + } const u32 num_samples = image.info.num_samples; const auto [samples_x, samples_y] = SamplesLog2(num_samples); const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2d1893c1c..a9504c0e8 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -21,6 +21,7 @@ #include "video_core/texture_cache/descriptor_table.h" #include "video_core/texture_cache/image_base.h" #include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/image_view_info.h" #include "video_core/texture_cache/render_targets.h" #include "video_core/texture_cache/slot_vector.h" @@ -39,6 +40,12 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using namespace Common::Literals; +struct ImageViewInOut { + u32 index{}; + bool blacklist{}; + ImageViewId id{}; +}; + template <class P> class TextureCache { /// Address shift for caching images into a hash table @@ -52,11 +59,8 @@ class TextureCache { static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; /// True when the API can provide info about the memory of the device. static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; - - /// Image view ID for null descriptors - static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; - /// Sampler ID for bugged sampler ids - static constexpr SamplerId NULL_SAMPLER_ID{0}; + /// True when the API provides utilities for pixel format conversions. + static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS; static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; @@ -99,11 +103,11 @@ public: void MarkModification(ImageId id) noexcept; /// Fill image_view_ids with the graphics images in indices - void FillGraphicsImageViews(std::span<const u32> indices, - std::span<ImageViewId> image_view_ids); + template <bool has_blacklists> + void FillGraphicsImageViews(std::span<ImageViewInOut> views); /// Fill image_view_ids with the compute images in indices - void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); + void FillComputeImageViews(std::span<ImageViewInOut> views); /// Get the sampler from the graphics descriptor table in the specified index Sampler* GetGraphicsSampler(u32 index); @@ -117,6 +121,11 @@ public: /// Refresh the state for compute image view and sampler descriptors void SynchronizeComputeDescriptors(); + /// Updates the Render Targets if they can be rescaled + /// @param is_clear True when the render targets are being used for clears + /// @retval True if the Render Targets have been rescaled. + bool RescaleRenderTargets(bool is_clear); + /// Update bound render targets and upload memory if necessary /// @param is_clear True when the render targets are being used for clears void UpdateRenderTargets(bool is_clear); @@ -160,6 +169,10 @@ public: /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + [[nodiscard]] bool IsRescaling() const noexcept; + + [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; + std::mutex mutex; private: @@ -198,9 +211,10 @@ private: void RunGarbageCollector(); /// Fills image_view_ids in the image views in indices + template <bool has_blacklists> void FillImageViews(DescriptorTable<TICEntry>& table, - std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, - std::span<ImageViewId> image_view_ids); + std::span<ImageViewId> cached_image_view_ids, + std::span<ImageViewInOut> views); /// Find or create an image view in the guest descriptor table ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, @@ -285,7 +299,7 @@ private: void UntrackImage(ImageBase& image, ImageId image_id); /// Delete image from the cache - void DeleteImage(ImageId image); + void DeleteImage(ImageId image, bool immediate_delete = false); /// Remove image views references from the cache void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); @@ -306,7 +320,7 @@ private: void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); /// Execute copies from one image to the other, even if they are incompatible - void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); + void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies); /// Bind an image view as render target, downloading resources preemtively if needed void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); @@ -318,6 +332,12 @@ private: /// Returns true if the current clear parameters clear the whole image of a given image view [[nodiscard]] bool IsFullClear(ImageViewId id); + bool ImageCanRescale(ImageBase& image); + void InvalidateScale(Image& image); + bool ScaleUp(Image& image); + bool ScaleDown(Image& image); + u64 GetScaledImageSizeBytes(ImageBase& image); + Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; Tegra::Engines::Maxwell3D& maxwell3d; @@ -349,6 +369,7 @@ private: VAddr virtual_invalid_space{}; bool has_deleted_images = false; + bool is_rescaling = false; u64 total_used_memory = 0; u64 minimum_memory; u64 expected_memory; diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 47a11cb2f..5c274abdf 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -22,6 +22,13 @@ using ImageAllocId = SlotId; using SamplerId = SlotId; using FramebufferId = SlotId; +/// Fake image ID for null image views +constexpr ImageId NULL_IMAGE_ID{0}; +/// Image view ID for null descriptors +constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; +/// Sampler ID for bugged sampler ids +constexpr SamplerId NULL_SAMPLER_ID{0}; + enum class ImageType : u32 { e1D, e2D, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 59cf2f561..ddc9fb13a 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -723,7 +723,7 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { } std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base) { + SubresourceBase base, u32 up_scale, u32 down_shift) { ASSERT(dst.resources.levels >= src.resources.levels); ASSERT(dst.num_samples == src.num_samples); @@ -732,7 +732,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn ASSERT(src.type == ImageType::e3D); ASSERT(src.resources.levels == 1); } - + const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; std::vector<ImageCopy> copies; copies.reserve(src.resources.levels); for (s32 level = 0; level < src.resources.levels; ++level) { @@ -762,6 +762,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn if (is_dst_3d) { copy.extent.depth = src.size.depth; } + copy.extent.width = std::max<u32>((copy.extent.width * up_scale) >> down_shift, 1); + if (both_2d) { + copy.extent.height = std::max<u32>((copy.extent.height * up_scale) >> down_shift, 1); + } } return copies; } @@ -1153,10 +1157,10 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { dst_info.format = dst->info.format; } - if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { dst_info.format = src->info.format; } - if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { src_info.format = dst->info.format; } } diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 766502908..7af52de2e 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -55,7 +55,8 @@ struct OverlapResult { [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base); + SubresourceBase base, u32 up_scale = 1, + u32 down_shift = 0); [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index a552543ed..06954963d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -51,22 +51,6 @@ constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f, }; -unsigned SettingsMinimumAnisotropy() noexcept { - switch (static_cast<Anisotropy>(Settings::values.max_anisotropy.GetValue())) { - default: - case Anisotropy::Default: - return 1U; - case Anisotropy::Filter2x: - return 2U; - case Anisotropy::Filter4x: - return 4U; - case Anisotropy::Filter8x: - return 8U; - case Anisotropy::Filter16x: - return 16U; - } -} - } // Anonymous namespace std::array<float, 4> TSCEntry::BorderColor() const noexcept { @@ -78,7 +62,18 @@ std::array<float, 4> TSCEntry::BorderColor() const noexcept { } float TSCEntry::MaxAnisotropy() const noexcept { - return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); + if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) { + return 1.0f; + } + const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue(); + u32 added_anisotropic{}; + if (anisotropic_settings == 0) { + added_anisotropic = Settings::values.resolution_info.up_scale >> + Settings::values.resolution_info.down_shift; + } else { + added_anisotropic = Settings::values.max_anisotropy.GetValue() - 1U; + } + return static_cast<float>(1U << (max_anisotropy + added_anisotropic)); } } // namespace Tegra::Texture diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index cae543a51..e852c817e 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -37,6 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { + Settings::UpdateRescalingInfo(); + const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); @@ -53,11 +55,10 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor } } -u16 GetResolutionScaleFactor(const RendererBase& renderer) { - return static_cast<u16>( - Settings::values.resolution_factor.GetValue() != 0 - ? Settings::values.resolution_factor.GetValue() - : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); +float GetResolutionScaleFactor(const RendererBase& renderer) { + return Settings::values.resolution_info.active + ? Settings::values.resolution_info.up_factor + : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio(); } } // namespace VideoCore diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index f5c27125d..f86877e86 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -25,6 +25,6 @@ class RendererBase; /// Creates an emulated GPU instance using the given system context. std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); -u16 GetResolutionScaleFactor(const RendererBase& renderer); +float GetResolutionScaleFactor(const RendererBase& renderer); } // namespace VideoCore diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0f807990c..70c52aaac 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -21,6 +21,13 @@ namespace Vulkan { namespace { namespace Alternatives { +constexpr std::array STENCIL8_UINT{ + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_UNDEFINED, +}; + constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{ VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D16_UNORM_S8_UINT, @@ -74,6 +81,8 @@ void SetNext(void**& next, T& data) { constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { switch (format) { + case VK_FORMAT_S8_UINT: + return Alternatives::STENCIL8_UINT.data(); case VK_FORMAT_D24_UNORM_S8_UINT: return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data(); case VK_FORMAT_D16_UNORM_S8_UINT: @@ -103,6 +112,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica VK_FORMAT_A8B8G8R8_SNORM_PACK32, VK_FORMAT_A8B8G8R8_SINT_PACK32, VK_FORMAT_A8B8G8R8_SRGB_PACK32, + VK_FORMAT_R5G6B5_UNORM_PACK16, VK_FORMAT_B5G6R5_UNORM_PACK16, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_A2B10G10R10_UINT_PACK32, @@ -144,6 +154,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica VK_FORMAT_R4G4B4A4_UNORM_PACK16, VK_FORMAT_D32_SFLOAT, VK_FORMAT_D16_UNORM, + VK_FORMAT_S8_UINT, VK_FORMAT_D16_UNORM_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 2d5daf6cd..10653ac6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -40,6 +40,10 @@ public: VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; + /// Returns true if a format is supported. + bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const; + /// Reports a device loss. void ReportLoss() const; @@ -370,10 +374,6 @@ private: /// Returns true if the device natively supports blitting depth stencil images. bool TestDepthStencilBlits() const; - /// Returns true if a format is supported. - bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. vk::DeviceDispatch dld; ///< Device function pointers. vk::PhysicalDevice physical; ///< Physical device. diff --git a/src/yuzu/applets/qt_software_keyboard.cpp b/src/yuzu/applets/qt_software_keyboard.cpp index 8fc0c5a36..a83a11a95 100644 --- a/src/yuzu/applets/qt_software_keyboard.cpp +++ b/src/yuzu/applets/qt_software_keyboard.cpp @@ -413,7 +413,7 @@ void QtSoftwareKeyboardDialog::ShowTextCheckDialog( ? ui->text_edit_osk->toPlainText().toStdU16String() : ui->line_edit_osk->text().toStdU16String(); - emit SubmitNormalText(SwkbdResult::Ok, std::move(text)); + emit SubmitNormalText(SwkbdResult::Ok, std::move(text), true); break; } } @@ -1510,7 +1510,8 @@ QtSoftwareKeyboard::~QtSoftwareKeyboard() = default; void QtSoftwareKeyboard::InitializeKeyboard( bool is_inline, Core::Frontend::KeyboardInitializeParameters initialize_parameters, - std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> submit_normal_callback_, + std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)> + submit_normal_callback_, std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)> submit_inline_callback_) { if (is_inline) { @@ -1609,8 +1610,8 @@ void QtSoftwareKeyboard::ExitKeyboard() const { } void QtSoftwareKeyboard::SubmitNormalText(Service::AM::Applets::SwkbdResult result, - std::u16string submitted_text) const { - submit_normal_callback(result, submitted_text); + std::u16string submitted_text, bool confirmed) const { + submit_normal_callback(result, submitted_text, confirmed); } void QtSoftwareKeyboard::SubmitInlineText(Service::AM::Applets::SwkbdReplyType reply_type, diff --git a/src/yuzu/applets/qt_software_keyboard.h b/src/yuzu/applets/qt_software_keyboard.h index 1a03c098c..592d9c085 100644 --- a/src/yuzu/applets/qt_software_keyboard.h +++ b/src/yuzu/applets/qt_software_keyboard.h @@ -51,8 +51,8 @@ public: void ExitKeyboard(); signals: - void SubmitNormalText(Service::AM::Applets::SwkbdResult result, - std::u16string submitted_text) const; + void SubmitNormalText(Service::AM::Applets::SwkbdResult result, std::u16string submitted_text, + bool confirmed = false) const; void SubmitInlineText(Service::AM::Applets::SwkbdReplyType reply_type, std::u16string submitted_text, s32 cursor_position) const; @@ -234,7 +234,7 @@ public: void InitializeKeyboard( bool is_inline, Core::Frontend::KeyboardInitializeParameters initialize_parameters, - std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> + std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)> submit_normal_callback_, std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)> submit_inline_callback_) override; @@ -272,13 +272,13 @@ signals: void MainWindowExitKeyboard() const; private: - void SubmitNormalText(Service::AM::Applets::SwkbdResult result, - std::u16string submitted_text) const; + void SubmitNormalText(Service::AM::Applets::SwkbdResult result, std::u16string submitted_text, + bool confirmed) const; void SubmitInlineText(Service::AM::Applets::SwkbdReplyType reply_type, std::u16string submitted_text, s32 cursor_position) const; - mutable std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> + mutable std::function<void(Service::AM::Applets::SwkbdResult, std::u16string, bool)> submit_normal_callback; mutable std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)> submit_inline_callback; diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 40fd47406..976acd176 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -32,7 +32,6 @@ #include "common/settings.h" #include "core/core.h" #include "core/frontend/framebuffer_layout.h" -#include "core/hle/kernel/k_process.h" #include "input_common/keyboard.h" #include "input_common/main.h" #include "input_common/mouse/mouse_input.h" @@ -66,7 +65,7 @@ void EmuThread::run() { if (Settings::values.use_disk_shader_cache.GetValue()) { system.Renderer().ReadRasterizer()->LoadDiskResources( - system.CurrentProcess()->GetTitleID(), stop_token, + system.GetCurrentProcessProgramID(), stop_token, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { emit LoadProgress(stage, value, total); }); @@ -629,11 +628,9 @@ void GRenderWindow::ReleaseRenderTarget() { main_context.reset(); } -void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { - VideoCore::RendererBase& renderer = system.Renderer(); - if (res_scale == 0) { - res_scale = VideoCore::GetResolutionScaleFactor(renderer); - } +void GRenderWindow::CaptureScreenshot(const QString& screenshot_path) { + auto& renderer = system.Renderer(); + const f32 res_scale = VideoCore::GetResolutionScaleFactor(renderer); const Layout::FramebufferLayout layout{Layout::FrameLayoutFromResolutionScale(res_scale)}; screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32); diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index e6a0666e9..40fd4a9d6 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -178,7 +178,7 @@ public: bool IsLoadingComplete() const; - void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); + void CaptureScreenshot(const QString& screenshot_path); std::pair<u32, u32> ScaleTouch(const QPointF& pos) const; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index faea5dda1..8227d06bc 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -824,6 +824,9 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.vulkan_device); ReadGlobalSetting(Settings::values.fullscreen_mode); ReadGlobalSetting(Settings::values.aspect_ratio); + ReadGlobalSetting(Settings::values.resolution_setup); + ReadGlobalSetting(Settings::values.scaling_filter); + ReadGlobalSetting(Settings::values.anti_aliasing); ReadGlobalSetting(Settings::values.max_anisotropy); ReadGlobalSetting(Settings::values.use_speed_limit); ReadGlobalSetting(Settings::values.speed_limit); @@ -1364,6 +1367,18 @@ void Config::SaveRendererValues() { static_cast<u32>(Settings::values.fullscreen_mode.GetDefault()), Settings::values.fullscreen_mode.UsingGlobal()); WriteGlobalSetting(Settings::values.aspect_ratio); + WriteSetting(QString::fromStdString(Settings::values.resolution_setup.GetLabel()), + static_cast<u32>(Settings::values.resolution_setup.GetValue(global)), + static_cast<u32>(Settings::values.resolution_setup.GetDefault()), + Settings::values.resolution_setup.UsingGlobal()); + WriteSetting(QString::fromStdString(Settings::values.scaling_filter.GetLabel()), + static_cast<u32>(Settings::values.scaling_filter.GetValue(global)), + static_cast<u32>(Settings::values.scaling_filter.GetDefault()), + Settings::values.scaling_filter.UsingGlobal()); + WriteSetting(QString::fromStdString(Settings::values.anti_aliasing.GetLabel()), + static_cast<u32>(Settings::values.anti_aliasing.GetValue(global)), + static_cast<u32>(Settings::values.anti_aliasing.GetDefault()), + Settings::values.anti_aliasing.UsingGlobal()); WriteGlobalSetting(Settings::values.max_anisotropy); WriteGlobalSetting(Settings::values.use_speed_limit); WriteGlobalSetting(Settings::values.speed_limit); diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index a7f4a6720..d673c1cdc 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -189,5 +189,8 @@ Q_DECLARE_METATYPE(Settings::CPUAccuracy); Q_DECLARE_METATYPE(Settings::GPUAccuracy); Q_DECLARE_METATYPE(Settings::FullscreenMode); Q_DECLARE_METATYPE(Settings::NvdecEmulation); +Q_DECLARE_METATYPE(Settings::ResolutionSetup); +Q_DECLARE_METATYPE(Settings::ScalingFilter); +Q_DECLARE_METATYPE(Settings::AntiAliasing); Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 8e20cc6f3..59f975a6e 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -89,6 +89,7 @@ void ConfigureGraphics::SetConfiguration() { ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->nvdec_emulation_widget->setEnabled(runtime_lock); + ui->resolution_combobox->setEnabled(runtime_lock); ui->accelerate_astc->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_asynchronous_gpu_emulation->setChecked( @@ -102,6 +103,12 @@ void ConfigureGraphics::SetConfiguration() { ui->nvdec_emulation->setCurrentIndex( static_cast<int>(Settings::values.nvdec_emulation.GetValue())); ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); + ui->resolution_combobox->setCurrentIndex( + static_cast<int>(Settings::values.resolution_setup.GetValue())); + ui->scaling_filter_combobox->setCurrentIndex( + static_cast<int>(Settings::values.scaling_filter.GetValue())); + ui->anti_aliasing_combobox->setCurrentIndex( + static_cast<int>(Settings::values.anti_aliasing.GetValue())); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); ConfigurationShared::SetHighlight(ui->api_widget, @@ -122,6 +129,21 @@ void ConfigureGraphics::SetConfiguration() { ConfigurationShared::SetHighlight(ui->ar_label, !Settings::values.aspect_ratio.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->resolution_combobox, + &Settings::values.resolution_setup); + ConfigurationShared::SetHighlight(ui->resolution_label, + !Settings::values.resolution_setup.UsingGlobal()); + + ConfigurationShared::SetPerGameSetting(ui->scaling_filter_combobox, + &Settings::values.scaling_filter); + ConfigurationShared::SetHighlight(ui->scaling_filter_label, + !Settings::values.scaling_filter.UsingGlobal()); + + ConfigurationShared::SetPerGameSetting(ui->anti_aliasing_combobox, + &Settings::values.anti_aliasing); + ConfigurationShared::SetHighlight(ui->anti_aliasing_label, + !Settings::values.anti_aliasing.UsingGlobal()); + ui->bg_combobox->setCurrentIndex(Settings::values.bg_red.UsingGlobal() ? 0 : 1); ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); @@ -133,11 +155,22 @@ void ConfigureGraphics::SetConfiguration() { } void ConfigureGraphics::ApplyConfiguration() { + const auto resolution_setup = static_cast<Settings::ResolutionSetup>( + ui->resolution_combobox->currentIndex() - + ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); + + const auto scaling_filter = static_cast<Settings::ScalingFilter>( + ui->scaling_filter_combobox->currentIndex() - + ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); + + const auto anti_aliasing = static_cast<Settings::AntiAliasing>( + ui->anti_aliasing_combobox->currentIndex() - + ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.fullscreen_mode, ui->fullscreen_mode_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.aspect_ratio, ui->aspect_ratio_combobox); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_disk_shader_cache, ui->use_disk_shader_cache, use_disk_shader_cache); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, @@ -165,7 +198,34 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.bg_green.SetValue(static_cast<u8>(bg_color.green())); Settings::values.bg_blue.SetValue(static_cast<u8>(bg_color.blue())); } + if (Settings::values.resolution_setup.UsingGlobal()) { + Settings::values.resolution_setup.SetValue(resolution_setup); + } + if (Settings::values.scaling_filter.UsingGlobal()) { + Settings::values.scaling_filter.SetValue(scaling_filter); + } + if (Settings::values.anti_aliasing.UsingGlobal()) { + Settings::values.anti_aliasing.SetValue(anti_aliasing); + } } else { + if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.resolution_setup.SetGlobal(true); + } else { + Settings::values.resolution_setup.SetGlobal(false); + Settings::values.resolution_setup.SetValue(resolution_setup); + } + if (ui->scaling_filter_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.scaling_filter.SetGlobal(true); + } else { + Settings::values.scaling_filter.SetGlobal(false); + Settings::values.scaling_filter.SetValue(scaling_filter); + } + if (ui->anti_aliasing_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.anti_aliasing.SetGlobal(true); + } else { + Settings::values.anti_aliasing.SetGlobal(false); + Settings::values.anti_aliasing.SetValue(anti_aliasing); + } if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.renderer_backend.SetGlobal(true); Settings::values.shader_backend.SetGlobal(true); @@ -312,6 +372,9 @@ void ConfigureGraphics::SetupPerGameUI() { ui->device->setEnabled(Settings::values.renderer_backend.UsingGlobal()); ui->fullscreen_mode_combobox->setEnabled(Settings::values.fullscreen_mode.UsingGlobal()); ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); + ui->resolution_combobox->setEnabled(Settings::values.resolution_setup.UsingGlobal()); + ui->scaling_filter_combobox->setEnabled(Settings::values.scaling_filter.UsingGlobal()); + ui->anti_aliasing_combobox->setEnabled(Settings::values.anti_aliasing.UsingGlobal()); ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); @@ -340,6 +403,15 @@ void ConfigureGraphics::SetupPerGameUI() { ConfigurationShared::SetColoredComboBox( ui->fullscreen_mode_combobox, ui->fullscreen_mode_label, static_cast<int>(Settings::values.fullscreen_mode.GetValue(true))); + ConfigurationShared::SetColoredComboBox( + ui->resolution_combobox, ui->resolution_label, + static_cast<int>(Settings::values.resolution_setup.GetValue(true))); + ConfigurationShared::SetColoredComboBox( + ui->scaling_filter_combobox, ui->scaling_filter_label, + static_cast<int>(Settings::values.scaling_filter.GetValue(true))); + ConfigurationShared::SetColoredComboBox( + ui->anti_aliasing_combobox, ui->anti_aliasing_label, + static_cast<int>(Settings::values.anti_aliasing.GetValue(true))); ConfigurationShared::InsertGlobalItem( ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true))); ConfigurationShared::InsertGlobalItem( diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index beae74344..660b68c1c 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -310,6 +310,173 @@ </widget> </item> <item> + <widget class="QWidget" name="resolution_layout" native="true"> + <layout class="QHBoxLayout" name="horizontalLayout_5"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QLabel" name="resolution_label"> + <property name="text"> + <string>Resolution:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="resolution_combobox"> + <item> + <property name="text"> + <string>0.5X (360p/540p) [EXPERIMENTAL]</string> + </property> + </item> + <item> + <property name="text"> + <string>0.75X (540p/810p) [EXPERIMENTAL]</string> + </property> + </item> + <item> + <property name="text"> + <string>1X (720p/1080p)</string> + </property> + </item> + <item> + <property name="text"> + <string>2X (1440p/2160p)</string> + </property> + </item> + <item> + <property name="text"> + <string>3X (2160p/3240p)</string> + </property> + </item> + <item> + <property name="text"> + <string>4X (2880p/4320p)</string> + </property> + </item> + <item> + <property name="text"> + <string>5X (3600p/5400p)</string> + </property> + </item> + <item> + <property name="text"> + <string>6X (4320p/6480p)</string> + </property> + </item> + </widget> + </item> + </layout> + </widget> + </item> + <item> + <widget class="QWidget" name="scaling_filter_layout" native="true"> + <layout class="QHBoxLayout" name="horizontalLayout_6"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QLabel" name="scaling_filter_label"> + <property name="text"> + <string>Window Adapting Filter:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="scaling_filter_combobox"> + <item> + <property name="text"> + <string>Nearest Neighbor</string> + </property> + </item> + <item> + <property name="text"> + <string>Bilinear</string> + </property> + </item> + <item> + <property name="text"> + <string>Bicubic</string> + </property> + </item> + <item> + <property name="text"> + <string>Gaussian</string> + </property> + </item> + <item> + <property name="text"> + <string>ScaleForce</string> + </property> + </item> + <item> + <property name="text"> + <string>AMD's FidelityFX™️ Super Resolution [Vulkan Only]</string> + </property> + </item> + </widget> + </item> + </layout> + </widget> + </item> + <item> + <widget class="QWidget" name="anti_aliasing_layout" native="true"> + <layout class="QHBoxLayout" name="horizontalLayout_7"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QLabel" name="anti_aliasing_label"> + <property name="text"> + <string>Anti-Aliasing Method:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="anti_aliasing_combobox"> + <item> + <property name="text"> + <string>None</string> + </property> + </item> + <item> + <property name="text"> + <string>FXAA</string> + </property> + </item> + </widget> + </item> + </layout> + </widget> + </item> + <item> <widget class="QWidget" name="bg_layout" native="true"> <property name="sizePolicy"> <sizepolicy hsizetype="Preferred" vsizetype="Preferred"> diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index d06b45f17..96de0b3d1 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -125,27 +125,32 @@ <widget class="QComboBox" name="anisotropic_filtering_combobox"> <item> <property name="text"> + <string>Automatic</string> + </property> + </item> + <item> + <property name="text"> <string>Default</string> </property> </item> <item> <property name="text"> - <string>2x (WILL BREAK THINGS)</string> + <string>2x</string> </property> </item> <item> <property name="text"> - <string>4x (WILL BREAK THINGS)</string> + <string>4x</string> </property> </item> <item> <property name="text"> - <string>8x (WILL BREAK THINGS)</string> + <string>8x</string> </property> </item> <item> <property name="text"> - <string>16x (WILL BREAK THINGS)</string> + <string>16x</string> </property> </item> </widget> diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index 33110685a..a8b254199 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp @@ -163,7 +163,7 @@ void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { } void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { - const auto wheel_position = ev->position().toPoint(); + const auto wheel_position = ev->pos(); MicroProfileMousePosition(wheel_position.x() / x_scale, wheel_position.y() / y_scale, ev->angleDelta().y() / 120); ev->accept(); diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 6bd0f9ee9..2af95dbe5 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -159,7 +159,7 @@ GameListSearchField::GameListSearchField(GameList* parent) : QWidget{parent} { * @return true if the haystack contains all words of userinput */ static bool ContainsAllWords(const QString& haystack, const QString& userinput) { - const QStringList userinput_split = userinput.split(QLatin1Char{' '}, Qt::SkipEmptyParts); + const QStringList userinput_split = userinput.split(QLatin1Char{' '}, QString::SkipEmptyParts); return std::all_of(userinput_split.begin(), userinput_split.end(), [&haystack](const QString& s) { return haystack.contains(s); }); diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp index d4e97fa16..e7e58f314 100644 --- a/src/yuzu/hotkeys.cpp +++ b/src/yuzu/hotkeys.cpp @@ -46,6 +46,8 @@ QShortcut* HotkeyRegistry::GetHotkey(const QString& group, const QString& action if (!hk.shortcut) hk.shortcut = new QShortcut(hk.keyseq, widget, nullptr, nullptr, hk.context); + hk.shortcut->setAutoRepeat(false); + return hk.shortcut; } diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index e871fee36..6f83475b9 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -299,6 +299,8 @@ GMainWindow::GMainWindow() SDL_EnableScreenSaver(); #endif + Common::Log::Start(); + QStringList args = QApplication::arguments(); if (args.size() < 2) { @@ -483,8 +485,9 @@ void GMainWindow::SoftwareKeyboardInitialize( } else { connect( software_keyboard, &QtSoftwareKeyboardDialog::SubmitNormalText, this, - [this](Service::AM::Applets::SwkbdResult result, std::u16string submitted_text) { - emit SoftwareKeyboardSubmitNormalText(result, submitted_text); + [this](Service::AM::Applets::SwkbdResult result, std::u16string submitted_text, + bool confirmed) { + emit SoftwareKeyboardSubmitNormalText(result, submitted_text, confirmed); }, Qt::QueuedConnection); } @@ -744,6 +747,8 @@ void GMainWindow::InitializeWidgets() { shader_building_label = new QLabel(); shader_building_label->setToolTip(tr("The amount of shaders currently being built")); + res_scale_label = new QLabel(); + res_scale_label->setToolTip(tr("The current selected resolution scaling multiplier.")); emu_speed_label = new QLabel(); emu_speed_label->setToolTip( tr("Current emulation speed. Values higher or lower than 100% " @@ -756,8 +761,8 @@ void GMainWindow::InitializeWidgets() { tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For " "full-speed emulation this should be at most 16.67 ms.")); - for (auto& label : - {shader_building_label, emu_speed_label, game_fps_label, emu_frametime_label}) { + for (auto& label : {shader_building_label, res_scale_label, emu_speed_label, game_fps_label, + emu_frametime_label}) { label->setVisible(false); label->setFrameStyle(QFrame::NoFrame); label->setContentsMargins(4, 0, 4, 0); @@ -769,6 +774,55 @@ void GMainWindow::InitializeWidgets() { tas_label->setFocusPolicy(Qt::NoFocus); statusBar()->insertPermanentWidget(0, tas_label); + // setup AA button + aa_status_button = new QPushButton(); + aa_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); + aa_status_button->setFocusPolicy(Qt::NoFocus); + connect(aa_status_button, &QPushButton::clicked, [&] { + auto aa_mode = Settings::values.anti_aliasing.GetValue(); + if (aa_mode == Settings::AntiAliasing::LastAA) { + aa_mode = Settings::AntiAliasing::None; + } else { + aa_mode = static_cast<Settings::AntiAliasing>(static_cast<u32>(aa_mode) + 1); + } + Settings::values.anti_aliasing.SetValue(aa_mode); + aa_status_button->setChecked(true); + UpdateAAText(); + }); + UpdateAAText(); + aa_status_button->setCheckable(true); + aa_status_button->setChecked(true); + statusBar()->insertPermanentWidget(0, aa_status_button); + + // Setup Filter button + filter_status_button = new QPushButton(); + filter_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); + filter_status_button->setFocusPolicy(Qt::NoFocus); + connect(filter_status_button, &QPushButton::clicked, [&] { + auto filter = Settings::values.scaling_filter.GetValue(); + if (filter == Settings::ScalingFilter::LastFilter) { + filter = Settings::ScalingFilter::NearestNeighbor; + } else { + filter = static_cast<Settings::ScalingFilter>(static_cast<u32>(filter) + 1); + } + if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL && + filter == Settings::ScalingFilter::Fsr) { + filter = Settings::ScalingFilter::NearestNeighbor; + } + Settings::values.scaling_filter.SetValue(filter); + filter_status_button->setChecked(true); + UpdateFilterText(); + }); + auto filter = Settings::values.scaling_filter.GetValue(); + if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL && + filter == Settings::ScalingFilter::Fsr) { + Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::NearestNeighbor); + } + UpdateFilterText(); + filter_status_button->setCheckable(true); + filter_status_button->setChecked(true); + statusBar()->insertPermanentWidget(0, filter_status_button); + // Setup Dock button dock_status_button = new QPushButton(); dock_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); @@ -839,6 +893,11 @@ void GMainWindow::InitializeWidgets() { Settings::values.renderer_backend.SetValue(Settings::RendererBackend::Vulkan); } else { Settings::values.renderer_backend.SetValue(Settings::RendererBackend::OpenGL); + const auto filter = Settings::values.scaling_filter.GetValue(); + if (filter == Settings::ScalingFilter::Fsr) { + Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::NearestNeighbor); + UpdateFilterText(); + } } system->ApplySettings(); @@ -972,7 +1031,7 @@ void GMainWindow::InitializeHotkeys() { &QShortcut::activatedAmbiguously, ui->action_Fullscreen, &QAction::trigger); connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Exit Fullscreen"), this), &QShortcut::activated, this, [&] { - if (emulation_running) { + if (emulation_running && ui->action_Fullscreen->isChecked()) { ui->action_Fullscreen->setChecked(false); ToggleFullscreen(); } @@ -1532,6 +1591,7 @@ void GMainWindow::ShutdownGame() { // Disable status bar updates status_bar_update_timer.stop(); shader_building_label->setVisible(false); + res_scale_label->setVisible(false); emu_speed_label->setVisible(false); game_fps_label->setVisible(false); emu_frametime_label->setVisible(false); @@ -2762,7 +2822,7 @@ void GMainWindow::OnConfigureTas() { } void GMainWindow::OnConfigurePerGame() { - const u64 title_id = system->CurrentProcess()->GetTitleID(); + const u64 title_id = system->GetCurrentProcessProgramID(); OpenPerGameConfiguration(title_id, game_path.toStdString()); } @@ -2861,7 +2921,7 @@ void GMainWindow::OnToggleFilterBar() { } void GMainWindow::OnCaptureScreenshot() { - const u64 title_id = system->CurrentProcess()->GetTitleID(); + const u64 title_id = system->GetCurrentProcessProgramID(); const auto screenshot_path = QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir)); const auto date = @@ -2886,8 +2946,7 @@ void GMainWindow::OnCaptureScreenshot() { } } #endif - render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor.GetValue(), - filename); + render_window->CaptureScreenshot(filename); } // TODO: Written 2020-10-01: Remove per-game config migration code when it is irrelevant @@ -2978,6 +3037,11 @@ void GMainWindow::UpdateStatusBar() { shader_building_label->setVisible(false); } + const auto res_info = Settings::values.resolution_info; + const auto res_scale = res_info.up_factor; + res_scale_label->setText( + tr("Scale: %1x", "%1 is the resolution scaling factor").arg(res_scale)); + if (Settings::values.use_speed_limit.GetValue()) { emu_speed_label->setText(tr("Speed: %1% / %2%") .arg(results.emulation_speed * 100.0, 0, 'f', 0) @@ -2993,6 +3057,7 @@ void GMainWindow::UpdateStatusBar() { } emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2)); + res_scale_label->setVisible(true); emu_speed_label->setVisible(!Settings::values.use_multi_core.GetValue()); game_fps_label->setVisible(true); emu_frametime_label->setVisible(true); @@ -3022,11 +3087,55 @@ void GMainWindow::UpdateGPUAccuracyButton() { } } +void GMainWindow::UpdateFilterText() { + const auto filter = Settings::values.scaling_filter.GetValue(); + switch (filter) { + case Settings::ScalingFilter::NearestNeighbor: + filter_status_button->setText(tr("NEAREST")); + break; + case Settings::ScalingFilter::Bilinear: + filter_status_button->setText(tr("BILINEAR")); + break; + case Settings::ScalingFilter::Bicubic: + filter_status_button->setText(tr("BICUBIC")); + break; + case Settings::ScalingFilter::Gaussian: + filter_status_button->setText(tr("GAUSSIAN")); + break; + case Settings::ScalingFilter::ScaleForce: + filter_status_button->setText(tr("SCALEFORCE")); + break; + case Settings::ScalingFilter::Fsr: + filter_status_button->setText(tr("AMD'S FIDELITYFX SR")); + break; + default: + filter_status_button->setText(tr("BILINEAR")); + break; + } +} + +void GMainWindow::UpdateAAText() { + const auto aa_mode = Settings::values.anti_aliasing.GetValue(); + switch (aa_mode) { + case Settings::AntiAliasing::Fxaa: + aa_status_button->setText(tr("FXAA")); + break; + case Settings::AntiAliasing::None: + aa_status_button->setText(tr("NO AA")); + break; + default: + aa_status_button->setText(tr("FXAA")); + break; + } +} + void GMainWindow::UpdateStatusButtons() { dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::Vulkan); UpdateGPUAccuracyButton(); + UpdateFilterText(); + UpdateAAText(); } void GMainWindow::UpdateUISettings() { @@ -3191,9 +3300,9 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { if (!errors.isEmpty()) { QMessageBox::warning( this, tr("Derivation Components Missing"), - tr("Components are missing that may hinder key derivation from completing. " + tr("Encryption keys are missing. " "<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the yuzu " - "quickstart guide</a> to get all your keys and " + "quickstart guide</a> to get all your keys, firmware and " "games.<br><br><small>(%1)</small>") .arg(errors)); } diff --git a/src/yuzu/main.h b/src/yuzu/main.h index aed15a0a0..24633ff2d 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -150,7 +150,7 @@ signals: void ProfileSelectorFinishedSelection(std::optional<Common::UUID> uuid); void SoftwareKeyboardSubmitNormalText(Service::AM::Applets::SwkbdResult result, - std::u16string submitted_text); + std::u16string submitted_text, bool confirmed); void SoftwareKeyboardSubmitInlineText(Service::AM::Applets::SwkbdReplyType reply_type, std::u16string submitted_text, s32 cursor_position); @@ -302,6 +302,8 @@ private: void MigrateConfigFiles(); void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, std::string_view gpu_vendor = {}); + void UpdateFilterText(); + void UpdateAAText(); void UpdateStatusBar(); void UpdateGPUAccuracyButton(); void UpdateStatusButtons(); @@ -328,6 +330,7 @@ private: // Status bar elements QLabel* message_label = nullptr; QLabel* shader_building_label = nullptr; + QLabel* res_scale_label = nullptr; QLabel* emu_speed_label = nullptr; QLabel* game_fps_label = nullptr; QLabel* emu_frametime_label = nullptr; @@ -335,6 +338,8 @@ private: QPushButton* gpu_accuracy_button = nullptr; QPushButton* renderer_status_button = nullptr; QPushButton* dock_status_button = nullptr; + QPushButton* filter_status_button = nullptr; + QPushButton* aa_status_button = nullptr; QTimer status_bar_update_timer; std::unique_ptr<Config> config; diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h index cac19452f..936914ef3 100644 --- a/src/yuzu/uisettings.h +++ b/src/yuzu/uisettings.h @@ -68,7 +68,6 @@ struct Values { Settings::BasicSetting<bool> enable_discord_presence{true, "enable_discord_presence"}; Settings::BasicSetting<bool> enable_screenshot_save_as{true, "enable_screenshot_save_as"}; - Settings::BasicSetting<u16> screenshot_resolution_factor{0, "screenshot_resolution_factor"}; QString roms_path; QString symbols_path; diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 0b8fde691..33241ea98 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -451,6 +451,9 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); ReadSetting("Renderer", Settings::values.vulkan_device); + ReadSetting("Renderer", Settings::values.resolution_setup); + ReadSetting("Renderer", Settings::values.scaling_filter); + ReadSetting("Renderer", Settings::values.anti_aliasing); ReadSetting("Renderer", Settings::values.fullscreen_mode); ReadSetting("Renderer", Settings::values.aspect_ratio); ReadSetting("Renderer", Settings::values.max_anisotropy); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 339dca766..ecdc271a8 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -236,6 +236,29 @@ disable_shader_loop_safety_checks = # Which Vulkan physical device to use (defaults to 0) vulkan_device = +# 0: 0.5x (360p/540p) [EXPERIMENTAL] +# 1: 0.75x (540p/810p) [EXPERIMENTAL] +# 2 (default): 1x (720p/1080p) +# 3: 2x (1440p/2160p) +# 4: 3x (2160p/3240p) +# 5: 4x (2880p/4320p) +# 6: 5x (3600p/5400p) +# 7: 6x (4320p/6480p) +resolution_setup = + +# Pixel filter to use when up- or down-sampling rendered frames. +# 0: Nearest Neighbor +# 1 (default): Bilinear +# 2: Bicubic +# 3: Gaussian +# 4: ScaleForce +# 5: AMD FidelityFX™️ Super Resolution [Vulkan Only] +scaling_filter = + +# Anti-Aliasing (AA) +# 0 (default): None, 1: FXAA +anti_aliasing = + # Whether to use fullscreen or borderless window mode # 0 (Windows default): Borderless window, 1 (All other default): Exclusive fullscreen fullscreen_mode = diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 67587cc54..b44ea0cc4 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -28,7 +28,6 @@ #include "core/crypto/key_manager.h" #include "core/file_sys/registered_cache.h" #include "core/file_sys/vfs_real.h" -#include "core/hle/kernel/k_process.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/loader/loader.h" #include "core/telemetry_session.h" @@ -203,7 +202,7 @@ int main(int argc, char** argv) { if (Settings::values.use_disk_shader_cache.GetValue()) { system.Renderer().ReadRasterizer()->LoadDiskResources( - system.CurrentProcess()->GetTitleID(), std::stop_token{}, + system.GetCurrentProcessProgramID(), std::stop_token{}, [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); } |