diff options
35 files changed, 445 insertions, 206 deletions
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 949384fd3..e40d117d6 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -18,6 +18,7 @@ #include "common/fs/fs_paths.h" #include "common/fs/path_util.h" #include "common/literals.h" +#include "common/thread.h" #include "common/logging/backend.h" #include "common/logging/log.h" diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h new file mode 100644 index 000000000..365488ba5 --- /dev/null +++ b/src/common/lru_cache.h @@ -0,0 +1,140 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2+ or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <deque> +#include <memory> +#include <type_traits> + +#include "common/common_types.h" + +namespace Common { + +template <class Traits> +class LeastRecentlyUsedCache { + using ObjectType = typename Traits::ObjectType; + using TickType = typename Traits::TickType; + + struct Item { + ObjectType obj; + TickType tick; + Item* next{}; + Item* prev{}; + }; + +public: + LeastRecentlyUsedCache() : first_item{}, last_item{} {} + ~LeastRecentlyUsedCache() = default; + + size_t Insert(ObjectType obj, TickType tick) { + const auto new_id = Build(); + auto& item = item_pool[new_id]; + item.obj = obj; + item.tick = tick; + Attach(item); + return new_id; + } + + void Touch(size_t id, TickType tick) { + auto& item = item_pool[id]; + if (item.tick >= tick) { + return; + } + item.tick = tick; + if (&item == last_item) { + return; + } + Detach(item); + Attach(item); + } + + void Free(size_t id) { + auto& item = item_pool[id]; + Detach(item); + item.prev = nullptr; + item.next = nullptr; + free_items.push_back(id); + } + + template <typename Func> + void ForEachItemBelow(TickType tick, Func&& func) { + static constexpr bool RETURNS_BOOL = + std::is_same_v<std::invoke_result<Func, ObjectType>, bool>; + Item* iterator = first_item; + while (iterator) { + if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) { + return; + } + Item* next = iterator->next; + if constexpr (RETURNS_BOOL) { + if (func(iterator->obj)) { + return; + } + } else { + func(iterator->obj); + } + iterator = next; + } + } + +private: + size_t Build() { + if (free_items.empty()) { + const size_t item_id = item_pool.size(); + auto& item = item_pool.emplace_back(); + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + const size_t item_id = free_items.front(); + free_items.pop_front(); + auto& item = item_pool[item_id]; + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + + void Attach(Item& item) { + if (!first_item) { + first_item = &item; + } + if (!last_item) { + last_item = &item; + } else { + item.prev = last_item; + last_item->next = &item; + item.next = nullptr; + last_item = &item; + } + } + + void Detach(Item& item) { + if (item.prev) { + item.prev->next = item.next; + } + if (item.next) { + item.next->prev = item.prev; + } + if (&item == first_item) { + first_item = item.next; + if (first_item) { + first_item->prev = nullptr; + } + } + if (&item == last_item) { + last_item = item.prev; + if (last_item) { + last_item->next = nullptr; + } + } + } + + std::deque<Item> item_pool; + std::deque<size_t> free_items; + Item* first_item{}; + Item* last_item{}; +}; + +} // namespace Common diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 996315999..fd3b639cd 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -59,7 +59,6 @@ void LogSettings() { log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); - log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); log_setting("Audio_OutputEngine", values.sink_id.GetValue()); log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); @@ -143,7 +142,6 @@ void RestoreGlobalState(bool is_powered_on) { values.shader_backend.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); - values.use_caches_gc.SetGlobal(true); values.bg_red.SetGlobal(true); values.bg_green.SetGlobal(true); values.bg_blue.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 20769d310..ec4d381e8 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -475,7 +475,6 @@ struct Values { ShaderBackend::SPIRV, "shader_backend"}; Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; - Setting<bool> use_caches_gc{false, "use_caches_gc"}; Setting<u8> bg_red{0, "bg_red"}; Setting<u8> bg_green{0, "bg_green"}; diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 7e195346b..77efcabf0 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -21,34 +21,25 @@ namespace Core { CpuManager::CpuManager(System& system_) : system{system_} {} CpuManager::~CpuManager() = default; -void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) { - cpu_manager.RunThread(core); +void CpuManager::ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, + std::size_t core) { + cpu_manager.RunThread(stop_token, core); } void CpuManager::Initialize() { running_mode = true; if (is_multicore) { for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - core_data[core].host_thread = - std::make_unique<std::thread>(ThreadStart, std::ref(*this), core); + core_data[core].host_thread = std::jthread(ThreadStart, std::ref(*this), core); } } else { - core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0); + core_data[0].host_thread = std::jthread(ThreadStart, std::ref(*this), 0); } } void CpuManager::Shutdown() { running_mode = false; Pause(false); - if (is_multicore) { - for (auto& data : core_data) { - data.host_thread->join(); - data.host_thread.reset(); - } - } else { - core_data[0].host_thread->join(); - core_data[0].host_thread.reset(); - } } std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() { @@ -317,7 +308,7 @@ void CpuManager::Pause(bool paused) { } } -void CpuManager::RunThread(std::size_t core) { +void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) { /// Initialization system.RegisterCoreThread(core); std::string name; @@ -361,6 +352,10 @@ void CpuManager::RunThread(std::size_t core) { return; } + if (stop_token.stop_requested()) { + break; + } + auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread(); data.is_running = true; Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext()); diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h index 140263b09..9d92d4af0 100644 --- a/src/core/cpu_manager.h +++ b/src/core/cpu_manager.h @@ -78,9 +78,9 @@ private: void SingleCoreRunSuspendThread(); void SingleCorePause(bool paused); - static void ThreadStart(CpuManager& cpu_manager, std::size_t core); + static void ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, std::size_t core); - void RunThread(std::size_t core); + void RunThread(std::stop_token stop_token, std::size_t core); struct CoreData { std::shared_ptr<Common::Fiber> host_context; @@ -89,7 +89,7 @@ private: std::atomic<bool> is_running; std::atomic<bool> is_paused; std::atomic<bool> initialized; - std::unique_ptr<std::thread> host_thread; + std::jthread host_thread; }; std::atomic<bool> running_mode{}; diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 59ddf6298..b4c3a6099 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -9,17 +9,20 @@ #include "core/core.h" #include "core/hle/kernel/k_writable_event.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/service/kernel_helpers.h" #include "core/hle/service/nvflinger/buffer_queue.h" namespace Service::NVFlinger { -BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_) - : id(id_), layer_id(layer_id_), buffer_wait_event{kernel} { - Kernel::KAutoObject::Create(std::addressof(buffer_wait_event)); - buffer_wait_event.Initialize("BufferQueue:WaitEvent"); +BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_, + KernelHelpers::ServiceContext& service_context_) + : id(id_), layer_id(layer_id_), service_context{service_context_} { + buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent"); } -BufferQueue::~BufferQueue() = default; +BufferQueue::~BufferQueue() { + service_context.CloseEvent(buffer_wait_event); +} void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) { ASSERT(slot < buffer_slots); @@ -41,7 +44,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) .multi_fence = {}, }; - buffer_wait_event.GetWritableEvent().Signal(); + buffer_wait_event->GetWritableEvent().Signal(); } std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, @@ -119,7 +122,7 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult } free_buffers_condition.notify_one(); - buffer_wait_event.GetWritableEvent().Signal(); + buffer_wait_event->GetWritableEvent().Signal(); } std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { @@ -154,7 +157,7 @@ void BufferQueue::ReleaseBuffer(u32 slot) { } free_buffers_condition.notify_one(); - buffer_wait_event.GetWritableEvent().Signal(); + buffer_wait_event->GetWritableEvent().Signal(); } void BufferQueue::Connect() { @@ -169,7 +172,7 @@ void BufferQueue::Disconnect() { std::unique_lock lock{queue_sequence_mutex}; queue_sequence.clear(); } - buffer_wait_event.GetWritableEvent().Signal(); + buffer_wait_event->GetWritableEvent().Signal(); is_connect = false; free_buffers_condition.notify_one(); } @@ -189,11 +192,11 @@ u32 BufferQueue::Query(QueryType type) { } Kernel::KWritableEvent& BufferQueue::GetWritableBufferWaitEvent() { - return buffer_wait_event.GetWritableEvent(); + return buffer_wait_event->GetWritableEvent(); } Kernel::KReadableEvent& BufferQueue::GetBufferWaitEvent() { - return buffer_wait_event.GetReadableEvent(); + return buffer_wait_event->GetReadableEvent(); } } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 61e337ac5..759247eb0 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -24,6 +24,10 @@ class KReadableEvent; class KWritableEvent; } // namespace Kernel +namespace Service::KernelHelpers { +class ServiceContext; +} // namespace Service::KernelHelpers + namespace Service::NVFlinger { constexpr u32 buffer_slots = 0x40; @@ -54,7 +58,8 @@ public: NativeWindowFormat = 2, }; - explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_); + explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_, + KernelHelpers::ServiceContext& service_context_); ~BufferQueue(); enum class BufferTransformFlags : u32 { @@ -130,12 +135,14 @@ private: std::list<u32> free_buffers; std::array<Buffer, buffer_slots> buffers; std::list<u32> queue_sequence; - Kernel::KEvent buffer_wait_event; + Kernel::KEvent* buffer_wait_event{}; std::mutex free_buffers_mutex; std::condition_variable free_buffers_condition; std::mutex queue_sequence_mutex; + + KernelHelpers::ServiceContext& service_context; }; } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 941748970..00bff8caf 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -61,12 +61,13 @@ void NVFlinger::SplitVSync() { } } -NVFlinger::NVFlinger(Core::System& system_) : system(system_) { - displays.emplace_back(0, "Default", system); - displays.emplace_back(1, "External", system); - displays.emplace_back(2, "Edid", system); - displays.emplace_back(3, "Internal", system); - displays.emplace_back(4, "Null", system); +NVFlinger::NVFlinger(Core::System& system_) + : system(system_), service_context(system_, "nvflinger") { + displays.emplace_back(0, "Default", service_context, system); + displays.emplace_back(1, "External", service_context, system); + displays.emplace_back(2, "Edid", service_context, system); + displays.emplace_back(3, "Internal", service_context, system); + displays.emplace_back(4, "Null", service_context, system); guard = std::make_shared<std::mutex>(); // Schedule the screen composition events @@ -146,7 +147,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { void NVFlinger::CreateLayerAtId(VI::Display& display, u64 layer_id) { const u32 buffer_queue_id = next_buffer_queue_id++; buffer_queues.emplace_back( - std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id)); + std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id, service_context)); display.CreateLayer(layer_id, *buffer_queues.back()); } diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index d80fd07ef..6d84cafb4 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -15,6 +15,7 @@ #include <vector> #include "common/common_types.h" +#include "core/hle/service/kernel_helpers.h" namespace Common { class Event; @@ -135,6 +136,8 @@ private: std::unique_ptr<std::thread> vsync_thread; std::unique_ptr<Common::Event> wait_event; std::atomic<bool> is_running{}; + + KernelHelpers::ServiceContext service_context; }; } // namespace Service::NVFlinger diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp index 0dd342dbf..b7705c02a 100644 --- a/src/core/hle/service/vi/display/vi_display.cpp +++ b/src/core/hle/service/vi/display/vi_display.cpp @@ -12,18 +12,21 @@ #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_writable_event.h" +#include "core/hle/service/kernel_helpers.h" #include "core/hle/service/vi/display/vi_display.h" #include "core/hle/service/vi/layer/vi_layer.h" namespace Service::VI { -Display::Display(u64 id, std::string name_, Core::System& system) - : display_id{id}, name{std::move(name_)}, vsync_event{system.Kernel()} { - Kernel::KAutoObject::Create(std::addressof(vsync_event)); - vsync_event.Initialize(fmt::format("Display VSync Event {}", id)); +Display::Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_, + Core::System& system_) + : display_id{id}, name{std::move(name_)}, service_context{service_context_} { + vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id)); } -Display::~Display() = default; +Display::~Display() { + service_context.CloseEvent(vsync_event); +} Layer& Display::GetLayer(std::size_t index) { return *layers.at(index); @@ -34,11 +37,11 @@ const Layer& Display::GetLayer(std::size_t index) const { } Kernel::KReadableEvent& Display::GetVSyncEvent() { - return vsync_event.GetReadableEvent(); + return vsync_event->GetReadableEvent(); } void Display::SignalVSyncEvent() { - vsync_event.GetWritableEvent().Signal(); + vsync_event->GetWritableEvent().Signal(); } void Display::CreateLayer(u64 layer_id, NVFlinger::BufferQueue& buffer_queue) { diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h index 166f2a4cc..0979fc421 100644 --- a/src/core/hle/service/vi/display/vi_display.h +++ b/src/core/hle/service/vi/display/vi_display.h @@ -18,6 +18,9 @@ class KEvent; namespace Service::NVFlinger { class BufferQueue; } +namespace Service::KernelHelpers { +class ServiceContext; +} // namespace Service::KernelHelpers namespace Service::VI { @@ -31,10 +34,13 @@ class Display { public: /// Constructs a display with a given unique ID and name. /// - /// @param id The unique ID for this display. + /// @param id The unique ID for this display. + /// @param service_context_ The ServiceContext for the owning service. /// @param name_ The name for this display. + /// @param system_ The global system instance. /// - Display(u64 id, std::string name_, Core::System& system); + Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_, + Core::System& system_); ~Display(); /// Gets the unique ID assigned to this display. @@ -98,9 +104,10 @@ public: private: u64 display_id; std::string name; + KernelHelpers::ServiceContext& service_context; std::vector<std::shared_ptr<Layer>> layers; - Kernel::KEvent vsync_event; + Kernel::KEvent* vsync_event{}; }; } // namespace Service::VI diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 67df46499..68f360b3c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -380,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); } case IR::Attribute::FrontFace: - return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), - ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); + return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), + ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), + ctx.f32_zero_value); case IR::Attribute::PointSpriteS: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 8b3e0a15c..69eeaa3e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -20,6 +20,7 @@ #include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { @@ -652,7 +653,7 @@ class TranslatePass { public: TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, - IR::AbstractSyntaxList& syntax_list_) + IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); @@ -660,6 +661,9 @@ public: IR::Block& first_block{*syntax_list.front().data.block}; IR::IREmitter ir(first_block, first_block.begin()); ir.Prologue(); + if (uses_demote_to_helper && host_info.needs_demote_reorder) { + DemoteCombinationPass(); + } } private: @@ -809,7 +813,14 @@ private: } case StatementType::Return: { ensure_block(); - IR::IREmitter{*current_block}.Epilogue(); + IR::Block* return_block{block_pool.Create(inst_pool)}; + IR::IREmitter{*return_block}.Epilogue(); + current_block->AddBranch(return_block); + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = return_block; + current_block = nullptr; syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; @@ -824,6 +835,7 @@ private: auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; merge.data.block = demote_block; + uses_demote_to_helper = true; break; } case StatementType::Unreachable: { @@ -855,11 +867,117 @@ private: return block_pool.Create(inst_pool); } + void DemoteCombinationPass() { + using Type = IR::AbstractSyntaxNode::Type; + std::vector<IR::Block*> demote_blocks; + std::vector<IR::U1> demote_conds; + u32 num_epilogues{}; + u32 branch_depth{}; + for (const IR::AbstractSyntaxNode& node : syntax_list) { + if (node.type == Type::If) { + ++branch_depth; + } + if (node.type == Type::EndIf) { + --branch_depth; + } + if (node.type != Type::Block) { + continue; + } + if (branch_depth > 1) { + // Skip reordering nested demote branches. + continue; + } + for (const IR::Inst& inst : node.data.block->Instructions()) { + const IR::Opcode op{inst.GetOpcode()}; + if (op == IR::Opcode::DemoteToHelperInvocation) { + demote_blocks.push_back(node.data.block); + break; + } + if (op == IR::Opcode::Epilogue) { + ++num_epilogues; + } + } + } + if (demote_blocks.size() == 0) { + return; + } + if (num_epilogues > 1) { + LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented."); + return; + } + s64 last_iterator_offset{}; + auto& asl{syntax_list}; + for (const IR::Block* demote_block : demote_blocks) { + const auto start_it{asl.begin() + last_iterator_offset}; + auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::If && asn.data.if_node.body == demote_block; + })}; + if (asl_it == asl.end()) { + // Demote without a conditional branch. + // No need to proceed since all fragment instances will be demoted regardless. + return; + } + const IR::Block* const end_if = asl_it->data.if_node.merge; + demote_conds.push_back(asl_it->data.if_node.cond); + last_iterator_offset = std::distance(asl.begin(), asl_it); + + asl_it = asl.erase(asl_it); + asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::Block && asn.data.block == demote_block; + }); + + asl_it = asl.erase(asl_it); + asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::EndIf && asn.data.end_if.merge == end_if; + }); + asl_it = asl.erase(asl_it); + } + const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) { + if (asn.type != Type::Block) { + return false; + } + for (const auto& inst : asn.data.block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return true; + } + } + return false; + }}; + const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)}; + const auto return_block_it{(reverse_it + 1).base()}; + + IR::IREmitter ir{*(return_block_it - 1)->data.block}; + IR::U1 cond(IR::Value(false)); + for (const auto& demote_cond : demote_conds) { + cond = ir.LogicalOr(cond, demote_cond); + } + cond.Inst()->DestructiveAddUsage(1); + + IR::AbstractSyntaxNode demote_if_node{}; + demote_if_node.type = Type::If; + demote_if_node.data.if_node.cond = cond; + demote_if_node.data.if_node.body = demote_blocks[0]; + demote_if_node.data.if_node.merge = return_block_it->data.block; + + IR::AbstractSyntaxNode demote_node{}; + demote_node.type = Type::Block; + demote_node.data.block = demote_blocks[0]; + + IR::AbstractSyntaxNode demote_endif_node{}; + demote_endif_node.type = Type::EndIf; + demote_endif_node.data.end_if.merge = return_block_it->data.block; + + asl.insert(return_block_it, demote_endif_node); + asl.insert(return_block_it, demote_node); + asl.insert(return_block_it, demote_if_node); + } + ObjectPool<Statement>& stmt_pool; ObjectPool<IR::Inst>& inst_pool; ObjectPool<IR::Block>& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; + bool uses_demote_to_helper{}; // TODO: C++20 Remove this when all compilers support constexpr std::vector #if __cpp_lib_constexpr_vector >= 201907 @@ -871,12 +989,13 @@ private: } // Anonymous namespace IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, + const HostTranslateInfo& host_info) { ObjectPool<Statement> stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info}; return syntax_list; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index 88b083649..e38158da3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,10 +11,13 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" -namespace Shader::Maxwell { +namespace Shader { +struct HostTranslateInfo; +namespace Maxwell { [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); -} // namespace Shader::Maxwell +} // namespace Maxwell +} // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c067d459c..012d55357 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) { IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; - program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = PostOrder(program.syntax_list.front()); program.stage = env.ShaderStage(); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 94a584219..96468b2e7 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -11,8 +11,9 @@ namespace Shader { /// Misc information about the host struct HostTranslateInfo { - bool support_float16{}; ///< True when the device supports 16-bit floats - bool support_int64{}; ///< True when the device supports 64-bit integers + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers + bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered }; } // namespace Shader diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index c3318095c..be2113f5a 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -261,16 +261,6 @@ public: stream_score += score; } - /// Sets the new frame tick - void SetFrameTick(u64 new_frame_tick) noexcept { - frame_tick = new_frame_tick; - } - - /// Returns the new frame tick - [[nodiscard]] u64 FrameTick() const noexcept { - return frame_tick; - } - /// Returns the likeliness of this being a stream buffer [[nodiscard]] int StreamScore() const noexcept { return stream_score; @@ -307,6 +297,14 @@ public: return words.size_bytes; } + size_t getLRUID() const noexcept { + return lru_id; + } + + void setLRUID(size_t lru_id_) { + lru_id = lru_id_; + } + private: template <Type type> u64* Array() noexcept { @@ -603,9 +601,9 @@ private: RasterizerInterface* rasterizer = nullptr; VAddr cpu_addr = 0; Words words; - u64 frame_tick = 0; BufferFlagBits flags{}; int stream_score = 0; + size_t lru_id = SIZE_MAX; }; } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3b43554f9..7bfd57369 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -20,6 +20,7 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/settings.h" @@ -330,7 +331,7 @@ private: template <bool insert> void ChangeRegister(BufferId buffer_id); - void TouchBuffer(Buffer& buffer) const noexcept; + void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); @@ -428,7 +429,11 @@ private: size_t immediate_buffer_capacity = 0; std::unique_ptr<u8[]> immediate_buffer_alloc; - typename SlotVector<Buffer>::Iterator deletion_iterator; + struct LRUItemParams { + using ObjectType = BufferId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; u64 frame_tick = 0; u64 total_used_memory = 0; @@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); - deletion_iterator = slot_buffers.end(); common_ranges.clear(); } @@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_buffers.end()) { - deletion_iterator = slot_buffers.begin(); - } - ++deletion_iterator; - if (deletion_iterator == slot_buffers.end()) { - break; - } - const auto [buffer_id, buffer] = *deletion_iterator; - if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { - DownloadBufferMemory(*buffer); - DeleteBuffer(buffer_id); + const auto clean_up = [this, &num_iterations](BufferId buffer_id) { + if (num_iterations == 0) { + return true; } - } + --num_iterations; + auto& buffer = slot_buffers[buffer_id]; + DownloadBufferMemory(buffer); + DeleteBuffer(buffer_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template <class P> @@ -485,7 +486,7 @@ void BufferCache<P>::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { + if (total_used_memory >= EXPECTED_MEMORY) { RunGarbageCollector(); } ++frame_tick; @@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { template <class P> void BufferCache<P>::BindHostIndexBuffer() { Buffer& buffer = slot_buffers[index_buffer.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, index_buffer.buffer_id); const u32 offset = buffer.Offset(index_buffer.cpu_addr); const u32 size = index_buffer.size; SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); @@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() { for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { const Binding& binding = vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; @@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size); @@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { const Binding& binding = storage_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = transform_feedback_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { const Binding& binding = compute_storage_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); const u32 size = static_cast<u32>(overlap.end - overlap.begin); const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); - TouchBuffer(slot_buffers[new_buffer_id]); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } Register(new_buffer_id); + TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); return new_buffer_id; } @@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) { template <class P> template <bool insert> void BufferCache<P>::ChangeRegister(BufferId buffer_id) { - const Buffer& buffer = slot_buffers[buffer_id]; + Buffer& buffer = slot_buffers[buffer_id]; const auto size = buffer.SizeBytes(); if (insert) { total_used_memory += Common::AlignUp(size, 1024); + buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick)); } else { total_used_memory -= Common::AlignUp(size, 1024); + lru_cache.Free(buffer.getLRUID()); } const VAddr cpu_addr_begin = buffer.CpuAddr(); const VAddr cpu_addr_end = cpu_addr_begin + size; @@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { } template <class P> -void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { - buffer.SetFrameTick(frame_tick); +void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { + if (buffer_id != NULL_BUFFER_ID) { + lru_cache.Touch(buffer.getLRUID(), frame_tick); + } } template <class P> diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1aa43523a..7f4ca6282 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -475,10 +475,10 @@ public: // These values are used by Nouveau and some games. AddGL = 0x8006, - SubtractGL = 0x8007, - ReverseSubtractGL = 0x8008, - MinGL = 0x800a, - MaxGL = 0x800b + MinGL = 0x8007, + MaxGL = 0x8008, + SubtractGL = 0x800a, + ReverseSubtractGL = 0x800b }; enum class Factor : u32 { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ee992aed4..de9e41659 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -156,6 +156,10 @@ public: return shader_backend; } + bool IsAmd() const { + return vendor_name == "ATI Technologies Inc."; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1f4dda17e..b0e14182e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo host_info{ .support_float16 = false, .support_int64 = device.HasShaderInt64(), + .needs_demote_reorder = device.IsAmd(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 5c43b8acf..cb0580182 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -159,11 +159,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); - const size_t size_bytes = GetSizeInBytes(framebuffer); // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); + const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel, + framebuffer.stride, framebuffer.height, + 1, block_height_log2, 0)}; Tegra::Texture::UnswizzleTexture( mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f316c4f92..31bfbcb06 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw host_info = Shader::HostTranslateInfo{ .support_float16 = device.IsFloat16Supported(), .support_int64 = device.IsShaderInt64Supported(), + .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || + driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, }; } diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index ff1feda9b..0c17a791b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -80,7 +80,7 @@ struct ImageBase { VAddr cpu_addr_end = 0; u64 modification_tick = 0; - u64 frame_tick = 0; + size_t lru_index = SIZE_MAX; std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a087498ff..24b809242 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,7 +5,6 @@ #pragma once #include "common/alignment.h" -#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -43,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_image_views.insert(runtime, NullImageParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); - deletion_iterator = slot_images.begin(); - if constexpr (HAS_DEVICE_MEMORY_INFO) { const auto device_memory = runtime.GetDeviceLocalMemory(); const u64 possible_expected_memory = (device_memory * 3) / 10; @@ -64,70 +61,38 @@ template <class P> void TextureCache<P>::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } + const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; + size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); + const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { + if (num_iterations == 0) { + return true; } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } + --num_iterations; + auto& image = slot_images[image_id]; + const bool must_download = image.IsSafeDownload(); + if (!high_priority_mode && must_download) { + return false; } - ++deletion_iterator; - } + if (must_download) { + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template <class P> void TextureCache<P>::TickFrame() { - if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + if (total_used_memory > minimum_memory) { RunGarbageCollector(); } sentenced_images.Tick(); @@ -1078,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + image.lru_index = lru_cache.Insert(image_id, frame_tick); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); if (False(image.flags & ImageFlagBits::Sparse)) { @@ -1115,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); + lru_cache.Free(image.lru_index); const auto& clear_page_table = [this, image_id]( u64 page, @@ -1384,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool if (is_modification) { MarkModification(image); } - image.frame_tick = frame_tick; + lru_cache.Touch(image.lru_index, frame_tick); } template <class P> diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e4ae351cb..d7528ed24 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/engines/fermi_2d.h" @@ -370,6 +371,12 @@ private: std::vector<ImageId> uncommitted_downloads; std::queue<std::vector<ImageId>> committed_downloads; + struct LRUItemParams { + using ObjectType = ImageId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; + static constexpr size_t TICKS_TO_DESTROY = 6; DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; @@ -379,7 +386,6 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; - typename SlotVector<Image>::Iterator deletion_iterator; }; } // namespace VideoCommon diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index c010b9353..24e943e4c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -63,14 +63,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 const u32 unswizzled_offset = slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; - if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset); - offset >= input.size()) { - // TODO(Rodrigo): This is an out of bounds access that should never happen. To - // avoid crashing the emulator, break. - ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size()); - break; - } - u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 377795326..85d292bcc 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -818,7 +818,6 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.shader_backend); ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); - ReadGlobalSetting(Settings::values.use_caches_gc); ReadGlobalSetting(Settings::values.bg_red); ReadGlobalSetting(Settings::values.bg_green); ReadGlobalSetting(Settings::values.bg_blue); @@ -1359,7 +1358,6 @@ void Config::SaveRendererValues() { Settings::values.shader_backend.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); - WriteGlobalSetting(Settings::values.use_caches_gc); WriteGlobalSetting(Settings::values.bg_red); WriteGlobalSetting(Settings::values.bg_green); WriteGlobalSetting(Settings::values.bg_blue); diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 099ddbb7c..43f1887d1 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -156,7 +156,7 @@ <item> <widget class="QCheckBox" name="use_disk_shader_cache"> <property name="text"> - <string>Use disk shader cache</string> + <string>Use disk pipeline cache</string> </property> </widget> </item> diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a31b8e192..bfd464061 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -28,7 +28,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); - ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); if (Settings::IsConfiguringGlobal()) { @@ -55,8 +54,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, ui->use_asynchronous_shaders, use_asynchronous_shaders); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc, - use_caches_gc); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, ui->use_fast_gpu_time, use_fast_gpu_time); } @@ -81,7 +78,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); - ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal()); ui->anisotropic_filtering_combobox->setEnabled( Settings::values.max_anisotropy.UsingGlobal()); @@ -94,8 +90,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { use_asynchronous_shaders); ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, Settings::values.use_fast_gpu_time, use_fast_gpu_time); - ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc, - use_caches_gc); ConfigurationShared::SetColoredComboBox( ui->gpu_accuracy, ui->label_gpu_accuracy, static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 7356e6916..13ba4ff6b 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -37,5 +37,4 @@ private: ConfigurationShared::CheckState use_vsync; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; - ConfigurationShared::CheckState use_caches_gc; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 4fe6b86ae..b91abc2f0 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -82,7 +82,7 @@ <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> </property> <property name="text"> - <string>Use asynchronous shader building (hack)</string> + <string>Use asynchronous shader building (Hack)</string> </property> </widget> </item> @@ -92,17 +92,7 @@ <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> </property> <property name="text"> - <string>Use Fast GPU Time (hack)</string> - </property> - </widget> - </item> - <item> - <widget class="QCheckBox" name="use_caches_gc"> - <property name="toolTip"> - <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string> - </property> - <property name="text"> - <string>Enable GPU cache garbage collection (experimental)</string> + <string>Use Fast GPU Time (Hack)</string> </property> </widget> </item> diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index e97804220..f9d949e75 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -515,16 +515,16 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location")); QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location")); QAction* open_transferable_shader_cache = - context_menu.addAction(tr("Open Transferable Shader Cache")); + context_menu.addAction(tr("Open Transferable Pipeline Cache")); context_menu.addSeparator(); QMenu* remove_menu = context_menu.addMenu(tr("Remove")); QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); - QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache")); - QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache")); + QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache")); + QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache")); remove_menu->addSeparator(); - QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches")); + QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Pipeline Caches")); QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 4f14be524..757dd1ea0 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -468,7 +468,6 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.use_nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.use_fast_gpu_time); - ReadSetting("Renderer", Settings::values.use_caches_gc); ReadSetting("Renderer", Settings::values.bg_red); ReadSetting("Renderer", Settings::values.bg_green); |