diff options
43 files changed, 671 insertions, 431 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp index 4d0ac72a5..b7b9259ec 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -112,8 +112,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, } struct System::Impl { explicit Impl(System& system) - : kernel{system}, fs_controller{system}, cpu_core_manager{system}, - applet_manager{system}, reporter{system} {} + : kernel{system}, fs_controller{system}, cpu_core_manager{system}, reporter{system}, + applet_manager{system} {} Cpu& CurrentCpuCore() { return cpu_core_manager.GetCurrentCore(); @@ -240,22 +240,27 @@ struct System::Impl { } void Shutdown() { - // Log last frame performance stats - const auto perf_results = GetAndResetPerfStats(); - telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", - perf_results.emulation_speed * 100.0); - telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", - perf_results.game_fps); - telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", - perf_results.frametime * 1000.0); - telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS", - perf_stats->GetMeanFrametime()); + // Log last frame performance stats if game was loded + if (perf_stats) { + const auto perf_results = GetAndResetPerfStats(); + telemetry_session->AddField(Telemetry::FieldType::Performance, + "Shutdown_EmulationSpeed", + perf_results.emulation_speed * 100.0); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", + perf_results.game_fps); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", + perf_results.frametime * 1000.0); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS", + perf_stats->GetMeanFrametime()); + } lm_manager.Flush(); is_powered_on = false; exit_lock = false; + gpu_core->WaitIdle(); + // Shutdown emulation session renderer.reset(); GDBStub::Shutdown(); diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 941ebc93a..3a32d5b41 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -1140,8 +1140,9 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called, kind={:08X}", static_cast<u8>(kind)); if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) { - const auto backend = BCAT::CreateBackendFromSettings( - [this](u64 tid) { return system.GetFileSystemController().GetBCATDirectory(tid); }); + const auto backend = BCAT::CreateBackendFromSettings(system, [this](u64 tid) { + return system.GetFileSystemController().GetBCATDirectory(tid); + }); const auto build_id_full = system.GetCurrentProcessBuildID(); u64 build_id{}; std::memcpy(&build_id, build_id_full.data(), sizeof(u64)); diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp index 073d0f6fa..25a886238 100644 --- a/src/core/hle/service/apm/controller.cpp +++ b/src/core/hle/service/apm/controller.cpp @@ -2,6 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> +#include <utility> + #include "common/logging/log.h" #include "core/core_timing.h" #include "core/hle/service/apm/controller.h" @@ -9,8 +13,7 @@ namespace Service::APM { -constexpr PerformanceConfiguration DEFAULT_PERFORMANCE_CONFIGURATION = - PerformanceConfiguration::Config7; +constexpr auto DEFAULT_PERFORMANCE_CONFIGURATION = PerformanceConfiguration::Config7; Controller::Controller(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing}, configs{ @@ -22,18 +25,35 @@ Controller::~Controller() = default; void Controller::SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config) { - static const std::map<PerformanceConfiguration, u32> PCONFIG_TO_SPEED_MAP{ - {PerformanceConfiguration::Config1, 1020}, {PerformanceConfiguration::Config2, 1020}, - {PerformanceConfiguration::Config3, 1224}, {PerformanceConfiguration::Config4, 1020}, - {PerformanceConfiguration::Config5, 1020}, {PerformanceConfiguration::Config6, 1224}, - {PerformanceConfiguration::Config7, 1020}, {PerformanceConfiguration::Config8, 1020}, - {PerformanceConfiguration::Config9, 1020}, {PerformanceConfiguration::Config10, 1020}, - {PerformanceConfiguration::Config11, 1020}, {PerformanceConfiguration::Config12, 1020}, - {PerformanceConfiguration::Config13, 1785}, {PerformanceConfiguration::Config14, 1785}, - {PerformanceConfiguration::Config15, 1020}, {PerformanceConfiguration::Config16, 1020}, - }; - - SetClockSpeed(PCONFIG_TO_SPEED_MAP.find(config)->second); + static constexpr std::array<std::pair<PerformanceConfiguration, u32>, 16> config_to_speed{{ + {PerformanceConfiguration::Config1, 1020}, + {PerformanceConfiguration::Config2, 1020}, + {PerformanceConfiguration::Config3, 1224}, + {PerformanceConfiguration::Config4, 1020}, + {PerformanceConfiguration::Config5, 1020}, + {PerformanceConfiguration::Config6, 1224}, + {PerformanceConfiguration::Config7, 1020}, + {PerformanceConfiguration::Config8, 1020}, + {PerformanceConfiguration::Config9, 1020}, + {PerformanceConfiguration::Config10, 1020}, + {PerformanceConfiguration::Config11, 1020}, + {PerformanceConfiguration::Config12, 1020}, + {PerformanceConfiguration::Config13, 1785}, + {PerformanceConfiguration::Config14, 1785}, + {PerformanceConfiguration::Config15, 1020}, + {PerformanceConfiguration::Config16, 1020}, + }}; + + const auto iter = std::find_if(config_to_speed.cbegin(), config_to_speed.cend(), + [config](const auto& entry) { return entry.first == config; }); + + if (iter == config_to_speed.cend()) { + LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}", + static_cast<u32>(config)); + return; + } + + SetClockSpeed(iter->second); configs.insert_or_assign(mode, config); } @@ -48,7 +68,7 @@ void Controller::SetFromCpuBoostMode(CpuBoostMode mode) { BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode))); } -PerformanceMode Controller::GetCurrentPerformanceMode() { +PerformanceMode Controller::GetCurrentPerformanceMode() const { return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld; } diff --git a/src/core/hle/service/apm/controller.h b/src/core/hle/service/apm/controller.h index 454caa6eb..af0c4cd34 100644 --- a/src/core/hle/service/apm/controller.h +++ b/src/core/hle/service/apm/controller.h @@ -56,7 +56,7 @@ public: void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config); void SetFromCpuBoostMode(CpuBoostMode mode); - PerformanceMode GetCurrentPerformanceMode(); + PerformanceMode GetCurrentPerformanceMode() const; PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode); private: diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp index 9d6946bc5..b86fda29a 100644 --- a/src/core/hle/service/bcat/backend/backend.cpp +++ b/src/core/hle/service/bcat/backend/backend.cpp @@ -10,8 +10,8 @@ namespace Service::BCAT { -ProgressServiceBackend::ProgressServiceBackend(std::string_view event_name) { - auto& kernel{Core::System::GetInstance().Kernel()}; +ProgressServiceBackend::ProgressServiceBackend(Kernel::KernelCore& kernel, + std::string_view event_name) { event = Kernel::WritableEvent::CreateEventPair( kernel, Kernel::ResetType::Automatic, std::string("ProgressServiceBackend:UpdateEvent:").append(event_name)); diff --git a/src/core/hle/service/bcat/backend/backend.h b/src/core/hle/service/bcat/backend/backend.h index 51dbd3316..ea4b16ad0 100644 --- a/src/core/hle/service/bcat/backend/backend.h +++ b/src/core/hle/service/bcat/backend/backend.h @@ -15,6 +15,14 @@ #include "core/hle/kernel/writable_event.h" #include "core/hle/result.h" +namespace Core { +class System; +} + +namespace Kernel { +class KernelCore; +} + namespace Service::BCAT { struct DeliveryCacheProgressImpl; @@ -88,7 +96,7 @@ public: void FinishDownload(ResultCode result); private: - explicit ProgressServiceBackend(std::string_view event_name); + explicit ProgressServiceBackend(Kernel::KernelCore& kernel, std::string_view event_name); Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent() const; DeliveryCacheProgressImpl& GetImpl(); @@ -145,6 +153,6 @@ public: std::optional<std::vector<u8>> GetLaunchParameter(TitleIDVersion title) override; }; -std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter); +std::unique_ptr<Backend> CreateBackendFromSettings(Core::System& system, DirectoryGetter getter); } // namespace Service::BCAT diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp index 64022982b..918159e11 100644 --- a/src/core/hle/service/bcat/backend/boxcat.cpp +++ b/src/core/hle/service/bcat/backend/boxcat.cpp @@ -104,14 +104,15 @@ std::string GetZIPFilePath(u64 title_id) { // If the error is something the user should know about (build ID mismatch, bad client version), // display an error. -void HandleDownloadDisplayResult(DownloadResult res) { +void HandleDownloadDisplayResult(const AM::Applets::AppletManager& applet_manager, + DownloadResult res) { if (res == DownloadResult::Success || res == DownloadResult::NoResponse || res == DownloadResult::GeneralWebError || res == DownloadResult::GeneralFSError || res == DownloadResult::NoMatchTitleId || res == DownloadResult::InvalidContentType) { return; } - const auto& frontend{Core::System::GetInstance().GetAppletManager().GetAppletFrontendSet()}; + const auto& frontend{applet_manager.GetAppletFrontendSet()}; frontend.error->ShowCustomErrorText( ResultCode(-1), "There was an error while attempting to use Boxcat.", DOWNLOAD_RESULT_LOG_MESSAGES[static_cast<std::size_t>(res)], [] {}); @@ -264,12 +265,13 @@ private: u64 build_id; }; -Boxcat::Boxcat(DirectoryGetter getter) : Backend(std::move(getter)) {} +Boxcat::Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter) + : Backend(std::move(getter)), applet_manager{applet_manager_} {} Boxcat::~Boxcat() = default; -void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, - ProgressServiceBackend& progress, +void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, DirectoryGetter dir_getter, + TitleIDVersion title, ProgressServiceBackend& progress, std::optional<std::string> dir_name = {}) { progress.SetNeedHLELock(true); @@ -295,7 +297,7 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, FileUtil::Delete(zip_path); } - HandleDownloadDisplayResult(res); + HandleDownloadDisplayResult(applet_manager, res); progress.FinishDownload(ERROR_GENERAL_BCAT_FAILURE); return; } @@ -364,17 +366,24 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, bool Boxcat::Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) { is_syncing.exchange(true); - std::thread([this, title, &progress] { SynchronizeInternal(dir_getter, title, progress); }) + + std::thread([this, title, &progress] { + SynchronizeInternal(applet_manager, dir_getter, title, progress); + }) .detach(); + return true; } bool Boxcat::SynchronizeDirectory(TitleIDVersion title, std::string name, ProgressServiceBackend& progress) { is_syncing.exchange(true); - std::thread( - [this, title, name, &progress] { SynchronizeInternal(dir_getter, title, progress, name); }) + + std::thread([this, title, name, &progress] { + SynchronizeInternal(applet_manager, dir_getter, title, progress, name); + }) .detach(); + return true; } @@ -420,7 +429,7 @@ std::optional<std::vector<u8>> Boxcat::GetLaunchParameter(TitleIDVersion title) FileUtil::Delete(path); } - HandleDownloadDisplayResult(res); + HandleDownloadDisplayResult(applet_manager, res); return std::nullopt; } } diff --git a/src/core/hle/service/bcat/backend/boxcat.h b/src/core/hle/service/bcat/backend/boxcat.h index 601151189..d65b42e58 100644 --- a/src/core/hle/service/bcat/backend/boxcat.h +++ b/src/core/hle/service/bcat/backend/boxcat.h @@ -9,6 +9,10 @@ #include <optional> #include "core/hle/service/bcat/backend/backend.h" +namespace Service::AM::Applets { +class AppletManager; +} + namespace Service::BCAT { struct EventStatus { @@ -20,12 +24,13 @@ struct EventStatus { /// Boxcat is yuzu's custom backend implementation of Nintendo's BCAT service. It is free to use and /// doesn't require a switch or nintendo account. The content is controlled by the yuzu team. class Boxcat final : public Backend { - friend void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, + friend void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, + DirectoryGetter dir_getter, TitleIDVersion title, ProgressServiceBackend& progress, std::optional<std::string> dir_name); public: - explicit Boxcat(DirectoryGetter getter); + explicit Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter); ~Boxcat() override; bool Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) override; @@ -53,6 +58,7 @@ private: class Client; std::unique_ptr<Client> client; + AM::Applets::AppletManager& applet_manager; }; } // namespace Service::BCAT diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp index 4e4aa758b..6d9d1527d 100644 --- a/src/core/hle/service/bcat/module.cpp +++ b/src/core/hle/service/bcat/module.cpp @@ -125,7 +125,11 @@ private: class IBcatService final : public ServiceFramework<IBcatService> { public: explicit IBcatService(Core::System& system_, Backend& backend_) - : ServiceFramework("IBcatService"), system{system_}, backend{backend_} { + : ServiceFramework("IBcatService"), system{system_}, backend{backend_}, + progress{{ + ProgressServiceBackend{system_.Kernel(), "Normal"}, + ProgressServiceBackend{system_.Kernel(), "Directory"}, + }} { // clang-format off static const FunctionInfo functions[] = { {10100, &IBcatService::RequestSyncDeliveryCache, "RequestSyncDeliveryCache"}, @@ -249,10 +253,7 @@ private: Core::System& system; Backend& backend; - std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress{ - ProgressServiceBackend{"Normal"}, - ProgressServiceBackend{"Directory"}, - }; + std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress; }; void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) { @@ -557,12 +558,12 @@ void Module::Interface::CreateDeliveryCacheStorageServiceWithApplicationId( rb.PushIpcInterface<IDeliveryCacheStorageService>(fsc.GetBCATDirectory(title_id)); } -std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) { - const auto backend = Settings::values.bcat_backend; - +std::unique_ptr<Backend> CreateBackendFromSettings([[maybe_unused]] Core::System& system, + DirectoryGetter getter) { #ifdef YUZU_ENABLE_BOXCAT - if (backend == "boxcat") - return std::make_unique<Boxcat>(std::move(getter)); + if (Settings::values.bcat_backend == "boxcat") { + return std::make_unique<Boxcat>(system.GetAppletManager(), std::move(getter)); + } #endif return std::make_unique<NullBackend>(std::move(getter)); @@ -571,7 +572,8 @@ std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) { Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_, FileSystem::FileSystemController& fsc_, const char* name) : ServiceFramework(name), fsc{fsc_}, module{std::move(module_)}, - backend{CreateBackendFromSettings([&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })}, + backend{CreateBackendFromSettings(system_, + [&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })}, system{system_} {} Module::Interface::~Interface() = default; diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index f764388bc..3f7b8e670 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/core_timing.h" #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" #include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/perf_stats.h" @@ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 transform, crop_rect}; system.GetPerfStats().EndGameFrame(); + system.GetPerfStats().EndSystemFrame(); system.GPU().SwapBuffers(&framebuffer); + system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); + system.GetPerfStats().BeginSystemFrame(); } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index eb88fee1b..b27ee0502 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -63,16 +63,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& return NvResult::BadParameter; } + u32 event_id = params.value & 0x00FF; + + if (event_id >= MaxNvEvents) { + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::BadParameter; + } + + auto event = events_interface.events[event_id]; auto& gpu = system.GPU(); // This is mostly to take into account unimplemented features. As synced // gpu is always synced. if (!gpu.IsAsync()) { + event.writable->Signal(); return NvResult::Success; } auto lock = gpu.LockSync(); const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); const s32 diff = current_syncpoint_value - params.threshold; if (diff >= 0) { + event.writable->Signal(); params.value = current_syncpoint_value; std::memcpy(output.data(), ¶ms, sizeof(params)); return NvResult::Success; @@ -88,27 +98,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& return NvResult::Timeout; } - u32 event_id; - if (is_async) { - event_id = params.value & 0x00FF; - if (event_id >= MaxNvEvents) { - std::memcpy(output.data(), ¶ms, sizeof(params)); - return NvResult::BadParameter; - } - } else { - if (ctrl.fresh_call) { - const auto result = events_interface.GetFreeEvent(); - if (result) { - event_id = *result; - } else { - LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); - event_id = params.value & 0x00FF; - } - } else { - event_id = ctrl.event_id; - } - } - EventState status = events_interface.status[event_id]; if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { events_interface.SetEventStatus(event_id, EventState::Waiting); @@ -120,7 +109,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; } params.value |= event_id; - events_interface.events[event_id].writable->Clear(); + event.writable->Clear(); gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); if (!is_async && ctrl.fresh_call) { ctrl.must_delay = true; diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index 5e0c23602..68d139cfb 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp @@ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3, 1}; rb.Push(RESULT_SUCCESS); if (event_id < MaxNvEvents) { - rb.PushCopyObjects(nvdrv->GetEvent(event_id)); + auto event = nvdrv->GetEvent(event_id); + event->Clear(); + rb.PushCopyObjects(event); rb.Push<u32>(NvResult::Success); } else { rb.Push<u32>(0); diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 307a7e928..7bfb99e34 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -40,8 +40,8 @@ Module::Module(Core::System& system) { auto& kernel = system.Kernel(); for (u32 i = 0; i < MaxNvEvents; i++) { std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); - events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::Automatic, event_label); + events_interface.events[i] = + Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label); events_interface.status[i] = EventState::Free; events_interface.registered[i] = false; } diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index e1a07d3ee..55b68eb0c 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -14,8 +14,8 @@ namespace Service::NVFlinger { -BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) { - auto& kernel = Core::System::GetInstance().Kernel(); +BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id) + : id(id), layer_id(layer_id) { buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, "BufferQueue NativeHandle"); } diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 356bedb81..8f9b18547 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -15,6 +15,10 @@ #include "core/hle/kernel/writable_event.h" #include "core/hle/service/nvdrv/nvdata.h" +namespace Kernel { +class KernelCore; +} + namespace Service::NVFlinger { struct IGBPBuffer { @@ -44,7 +48,7 @@ public: NativeWindowFormat = 2, }; - BufferQueue(u32 id, u64 layer_id); + explicit BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id); ~BufferQueue(); enum class BufferTransformFlags : u32 { diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 2e4d707b9..cc9522aad 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -83,7 +83,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { const u64 layer_id = next_layer_id++; const u32 buffer_queue_id = next_buffer_queue_id++; - buffer_queues.emplace_back(buffer_queue_id, layer_id); + buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id); display->CreateLayer(layer_id, buffer_queues.back()); return layer_id; } @@ -187,14 +187,18 @@ void NVFlinger::Compose() { MicroProfileFlip(); if (!buffer) { - // There was no queued buffer to draw, render previous frame - system.GetPerfStats().EndGameFrame(); - system.GPU().SwapBuffers({}); continue; } const auto& igbp_buffer = buffer->get().igbp_buffer; + const auto& gpu = system.GPU(); + const auto& multi_fence = buffer->get().multi_fence; + for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { + const auto& fence = multi_fence.fences[fence_id]; + gpu.WaitFence(fence.id, fence.value); + } + // Now send the buffer to the GPU for drawing. // TODO(Subv): Support more than just disp0. The display device selection is probably based // on which display we're drawing (Default, Internal, External, etc) diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index b56cb0627..10821d452 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -22,7 +22,7 @@ constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; StandardVmCallbacks::StandardVmCallbacks(const Core::System& system, const CheatProcessMetadata& metadata) - : system(system), metadata(metadata) {} + : metadata(metadata), system(system) {} StandardVmCallbacks::~StandardVmCallbacks() = default; @@ -176,9 +176,8 @@ std::vector<CheatEntry> TextCheatParser::Parse(const Core::System& system, CheatEngine::CheatEngine(Core::System& system, std::vector<CheatEntry> cheats, const std::array<u8, 0x20>& build_id) - : system{system}, core_timing{system.CoreTiming()}, vm{std::make_unique<StandardVmCallbacks>( - system, metadata)}, - cheats(std::move(cheats)) { + : vm{std::make_unique<StandardVmCallbacks>(system, metadata)}, + cheats(std::move(cheats)), core_timing{system.CoreTiming()}, system{system} { metadata.main_nso_build_id = build_id; } diff --git a/src/core/memory/dmnt_cheat_vm.cpp b/src/core/memory/dmnt_cheat_vm.cpp index cc16d15a4..4f4fa5099 100644 --- a/src/core/memory/dmnt_cheat_vm.cpp +++ b/src/core/memory/dmnt_cheat_vm.cpp @@ -1133,8 +1133,8 @@ void DmntCheatVm::Execute(const CheatProcessMetadata& metadata) { case SaveRestoreRegisterOpType::ClearRegs: case SaveRestoreRegisterOpType::Restore: default: - src = registers.data(); - dst = saved_values.data(); + src = saved_values.data(); + dst = registers.data(); break; } for (std::size_t i = 0; i < NumRegisters; i++) { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7802fd808..59976943a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -101,7 +101,8 @@ void Maxwell3D::InitializeRegisterDefaults() { #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) void Maxwell3D::InitDirtySettings() { - const auto set_block = [this](const u32 start, const u32 range, const u8 position) { + const auto set_block = [this](const std::size_t start, const std::size_t range, + const u8 position) { const auto start_itr = dirty_pointers.begin() + start; const auto end_itr = start_itr + range; std::fill(start_itr, end_itr, position); @@ -478,7 +479,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { } void Maxwell3D::FlushMMEInlineDraw() { - LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..095660115 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" @@ -17,6 +18,8 @@ namespace Tegra { +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); + GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::WaitFence(u32 syncpoint_id, u32 value) const { + // Synced GPU, is always in sync + if (!is_async) { + return; + } + MICROPROFILE_SCOPE(GPU_wait); + while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { + } +} + void GPU::IncrementSyncPoint(const u32 syncpoint_id) { syncpoints[syncpoint_id]++; std::lock_guard lock{sync_mutex}; @@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); + block.timestamp = system.CoreTiming().GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..dbca19f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,12 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + // Waits for the GPU to finish working + virtual void WaitIdle() const = 0; + + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. + void WaitFence(u32 syncpoint_id, u32 value) const; + void IncrementSyncPoint(u32 syncpoint_id); u32 GetSyncpointValue(u32 syncpoint_id) const; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } +void GPUAsynch::WaitIdle() const { + gpu_thread.WaitIdle(); +} + } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override; protected: void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override {} protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,8 +5,6 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; - system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); + PushCommand(SubmitListCommand(std::move(entries))); } void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } +void ThreadManager::WaitIdle() const { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { + } +} + u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); return fence; } -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -void SynchState::WaitForSynchronization(u64 fence) { - while (signaled_fence.load() < fence) - ; -} - } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -21,9 +21,6 @@ class DmaPusher; namespace Core { class System; -namespace Timing { -struct EventType; -} // namespace Timing } // namespace Core namespace VideoCommon::GPUThread { @@ -89,8 +86,6 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - void WaitForSynchronization(u64 fence); - using CommandQueue = Common::SPSCQueue<CommandDataContainer>; CommandQueue queue; u64 last_fence{}; @@ -121,6 +116,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + // Wait until the gpu thread is idle. + void WaitIdle() const; + private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); @@ -128,7 +126,6 @@ private: private: SynchState state; Core::System& system; - Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index dbaeac6db..42031d80a 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -11,6 +11,77 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { +namespace { +enum class Operation : u32 { + ALU = 0, + AddImmediate = 1, + ExtractInsert = 2, + ExtractShiftLeftImmediate = 3, + ExtractShiftLeftRegister = 4, + Read = 5, + Unused = 6, // This operation doesn't seem to be a valid encoding. + Branch = 7, +}; +} // Anonymous namespace + +enum class MacroInterpreter::ALUOperation : u32 { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + // Operations 4-7 don't seem to be valid encodings. + Xor = 8, + Or = 9, + And = 10, + AndNot = 11, + Nand = 12 +}; + +enum class MacroInterpreter::ResultOperation : u32 { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMethod = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMethod = 5, + MoveAndSetMethodFetchAndSend = 6, + MoveAndSetMethodSend = 7 +}; + +enum class MacroInterpreter::BranchCondition : u32 { + Zero = 0, + NotZero = 1, +}; + +union MacroInterpreter::Opcode { + u32 raw; + BitField<0, 3, Operation> operation; + BitField<4, 3, ResultOperation> result_operation; + BitField<4, 1, BranchCondition> branch_condition; + // If set on a branch, then the branch doesn't have a delay slot. + BitField<5, 1, u32> branch_annul; + BitField<7, 1, u32> is_exit; + BitField<8, 3, u32> dst; + BitField<11, 3, u32> src_a; + BitField<14, 3, u32> src_b; + // The signed immediate overlaps the second source operand and the alu operation. + BitField<14, 18, s32> immediate; + + BitField<17, 5, ALUOperation> alu_operation; + + // Bitfield instructions data + BitField<17, 5, u32> bf_src_bit; + BitField<22, 5, u32> bf_size; + BitField<27, 5, u32> bf_dst_bit; + + u32 GetBitfieldMask() const { + return (1 << bf_size) - 1; + } + + s32 GetBranchTarget() const { + return static_cast<s32>(immediate * sizeof(u32)); + } +}; MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index 76b6a895b..631146d89 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h @@ -6,7 +6,6 @@ #include <array> #include <optional> -#include <vector> #include "common/bit_field.h" #include "common/common_types.h" @@ -28,75 +27,11 @@ public: void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); private: - enum class Operation : u32 { - ALU = 0, - AddImmediate = 1, - ExtractInsert = 2, - ExtractShiftLeftImmediate = 3, - ExtractShiftLeftRegister = 4, - Read = 5, - Unused = 6, // This operation doesn't seem to be a valid encoding. - Branch = 7, - }; - - enum class ALUOperation : u32 { - Add = 0, - AddWithCarry = 1, - Subtract = 2, - SubtractWithBorrow = 3, - // Operations 4-7 don't seem to be valid encodings. - Xor = 8, - Or = 9, - And = 10, - AndNot = 11, - Nand = 12 - }; - - enum class ResultOperation : u32 { - IgnoreAndFetch = 0, - Move = 1, - MoveAndSetMethod = 2, - FetchAndSend = 3, - MoveAndSend = 4, - FetchAndSetMethod = 5, - MoveAndSetMethodFetchAndSend = 6, - MoveAndSetMethodSend = 7 - }; + enum class ALUOperation : u32; + enum class BranchCondition : u32; + enum class ResultOperation : u32; - enum class BranchCondition : u32 { - Zero = 0, - NotZero = 1, - }; - - union Opcode { - u32 raw; - BitField<0, 3, Operation> operation; - BitField<4, 3, ResultOperation> result_operation; - BitField<4, 1, BranchCondition> branch_condition; - BitField<5, 1, u32> - branch_annul; // If set on a branch, then the branch doesn't have a delay slot. - BitField<7, 1, u32> is_exit; - BitField<8, 3, u32> dst; - BitField<11, 3, u32> src_a; - BitField<14, 3, u32> src_b; - // The signed immediate overlaps the second source operand and the alu operation. - BitField<14, 18, s32> immediate; - - BitField<17, 5, ALUOperation> alu_operation; - - // Bitfield instructions data - BitField<17, 5, u32> bf_src_bit; - BitField<22, 5, u32> bf_size; - BitField<27, 5, u32> bf_dst_bit; - - u32 GetBitfieldMask() const { - return (1 << bf_size) - 1; - } - - s32 GetBranchTarget() const { - return static_cast<s32>(immediate * sizeof(u32)); - } - }; + union Opcode; union MethodAddress { u32 raw; @@ -149,9 +84,10 @@ private: Engines::Maxwell3D& maxwell3d; - u32 pc; ///< Current program counter - std::optional<u32> - delayed_pc; ///< Program counter to execute at after the delay slot is executed. + /// Current program counter + u32 pc; + /// Program counter to execute at after the delay slot is executed. + std::optional<u32> delayed_pc; static constexpr std::size_t NumMacroRegisters = 8; diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index ab71870ab..fe5f08ace 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -93,6 +93,7 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::DXT23_SRGB>, MortonCopy<true, PixelFormat::DXT45_SRGB>, MortonCopy<true, PixelFormat::BC7U_SRGB>, + MortonCopy<true, PixelFormat::R4G4B4A4U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, @@ -101,6 +102,16 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_10X8>, MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_6X6>, + MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_10X10>, + MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_12X12>, + MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_8X6>, + MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_6X5>, + MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z24S8>, @@ -162,6 +173,17 @@ static constexpr ConversionArray linear_to_morton_fns = { MortonCopy<false, PixelFormat::DXT23_SRGB>, MortonCopy<false, PixelFormat::DXT45_SRGB>, MortonCopy<false, PixelFormat::BC7U_SRGB>, + MortonCopy<false, PixelFormat::R4G4B4A4U>, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a85f730a8..cbcf81414 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + std::lock_guard lock{pages_mutex}; const u64 page_start{addr >> Memory::PAGE_BITS}; const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..c24a02d71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -9,6 +9,7 @@ #include <cstddef> #include <map> #include <memory> +#include <mutex> #include <optional> #include <tuple> #include <utility> @@ -230,6 +231,8 @@ private: using CachedPageMap = boost::icl::interval_map<u64, int>; CachedPageMap cached_pages; + + std::mutex pages_mutex; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index bb972bf37..baec66ff0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1148,7 +1148,7 @@ private: for (const auto& variant : extras) { if (const auto argument = std::get_if<TextureArgument>(&variant)) { expr += GenerateTextureArgument(*argument); - } else if (std::get_if<TextureAoffi>(&variant)) { + } else if (std::holds_alternative<TextureAoffi>(variant)) { expr += GenerateTextureAoffi(meta->aoffi); } else { UNREACHABLE(); @@ -1158,8 +1158,8 @@ private: return expr + ')'; } - std::string GenerateTextureArgument(TextureArgument argument) { - const auto [type, operand] = argument; + std::string GenerateTextureArgument(const TextureArgument& argument) { + const auto& [type, operand] = argument; if (operand == nullptr) { return {}; } @@ -1235,7 +1235,7 @@ private: std::string BuildImageValues(Operation operation) { constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; const std::size_t values_count{meta.values.size()}; std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); @@ -1780,14 +1780,14 @@ private: return {"0", Type::Int}; } - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), Type::Uint}; } Expression ImageStore(Operation operation) { - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), BuildIntegerCoordinates(operation), BuildImageValues(operation)); return {}; @@ -1795,7 +1795,7 @@ private: template <const std::string_view& opname> Expression AtomicImage(Operation operation) { - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; ASSERT(meta.values.size() == 1); return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), @@ -2246,7 +2246,7 @@ private: code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); } - std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { + std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { return fmt::format("{}_{}_{}", name, index, suffix); } @@ -2271,17 +2271,15 @@ private: ShaderWriter code; }; -static constexpr std::string_view flow_var = "flow_var_"; - std::string GetFlowVariable(u32 i) { - return fmt::format("{}{}", flow_var, i); + return fmt::format("flow_var_{}", i); } class ExprDecompiler { public: explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} - void operator()(VideoCommon::Shader::ExprAnd& expr) { + void operator()(const ExprAnd& expr) { inner += "( "; std::visit(*this, *expr.operand1); inner += " && "; @@ -2289,7 +2287,7 @@ public: inner += ')'; } - void operator()(VideoCommon::Shader::ExprOr& expr) { + void operator()(const ExprOr& expr) { inner += "( "; std::visit(*this, *expr.operand1); inner += " || "; @@ -2297,17 +2295,17 @@ public: inner += ')'; } - void operator()(VideoCommon::Shader::ExprNot& expr) { + void operator()(const ExprNot& expr) { inner += '!'; std::visit(*this, *expr.operand1); } - void operator()(VideoCommon::Shader::ExprPredicate& expr) { + void operator()(const ExprPredicate& expr) { const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); inner += decomp.GetPredicate(pred); } - void operator()(VideoCommon::Shader::ExprCondCode& expr) { + void operator()(const ExprCondCode& expr) { const Node cc = decomp.ir.GetConditionCode(expr.cc); std::string target; @@ -2332,15 +2330,15 @@ public: inner += target; } - void operator()(VideoCommon::Shader::ExprVar& expr) { + void operator()(const ExprVar& expr) { inner += GetFlowVariable(expr.var_index); } - void operator()(VideoCommon::Shader::ExprBoolean& expr) { + void operator()(const ExprBoolean& expr) { inner += expr.value ? "true" : "false"; } - std::string& GetResult() { + const std::string& GetResult() const { return inner; } @@ -2353,7 +2351,7 @@ class ASTDecompiler { public: explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} - void operator()(VideoCommon::Shader::ASTProgram& ast) { + void operator()(const ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); while (current) { Visit(current); @@ -2361,7 +2359,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTIfThen& ast) { + void operator()(const ASTIfThen& ast) { ExprDecompiler expr_parser{decomp}; std::visit(expr_parser, *ast.condition); decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); @@ -2375,7 +2373,7 @@ public: decomp.code.AddLine("}}"); } - void operator()(VideoCommon::Shader::ASTIfElse& ast) { + void operator()(const ASTIfElse& ast) { decomp.code.AddLine("else {{"); decomp.code.scope++; ASTNode current = ast.nodes.GetFirst(); @@ -2387,29 +2385,29 @@ public: decomp.code.AddLine("}}"); } - void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { + void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { + void operator()(const ASTBlockDecoded& ast) { decomp.VisitBlock(ast.nodes); } - void operator()(VideoCommon::Shader::ASTVarSet& ast) { + void operator()(const ASTVarSet& ast) { ExprDecompiler expr_parser{decomp}; std::visit(expr_parser, *ast.condition); decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); } - void operator()(VideoCommon::Shader::ASTLabel& ast) { + void operator()(const ASTLabel& ast) { decomp.code.AddLine("// Label_{}:", ast.index); } - void operator()(VideoCommon::Shader::ASTGoto& ast) { + void operator()([[maybe_unused]] const ASTGoto& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTDoWhile& ast) { + void operator()(const ASTDoWhile& ast) { ExprDecompiler expr_parser{decomp}; std::visit(expr_parser, *ast.condition); decomp.code.AddLine("do {{"); @@ -2423,7 +2421,7 @@ public: decomp.code.AddLine("}} while({});", expr_parser.GetResult()); } - void operator()(VideoCommon::Shader::ASTReturn& ast) { + void operator()(const ASTReturn& ast) { const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); if (!is_true) { ExprDecompiler expr_parser{decomp}; @@ -2443,7 +2441,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTBreak& ast) { + void operator()(const ASTBreak& ast) { const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); if (!is_true) { ExprDecompiler expr_parser{decomp}; @@ -2458,7 +2456,7 @@ public: } } - void Visit(VideoCommon::Shader::ASTNode& node) { + void Visit(const ASTNode& node) { std::visit(*this, *node->GetInnerData()); } @@ -2471,9 +2469,9 @@ void GLSLDecompiler::DecompileAST() { for (u32 i = 0; i < num_flow_variables; i++) { code.AddLine("bool {} = false;", GetFlowVariable(i)); } + ASTDecompiler decompiler{*this}; - VideoCommon::Shader::ASTNode program = ir.GetASTProgram(); - decompiler.Visit(program); + decompiler.Visit(ir.GetASTProgram()); } } // Anonymous namespace diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 173b76c4e..2f9bfd7e4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -111,7 +111,8 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXT45_SRGB {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // BC7U_SRGB + true}, // BC7U_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, ComponentType::UNorm, false}, // R4G4B4A4U {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB @@ -120,6 +121,16 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5_SRGB // Depth formats {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..4bbd17b12 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst RendererOpenGL::~RendererOpenGL() = default; void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - system.GetPerfStats().EndSystemFrame(); - // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.AllDirty(); @@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); - system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); - system.GetPerfStats().BeginSystemFrame(); - // Restore the rasterizer state prev_state.AllDirty(); prev_state.Apply(); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 4fb1ca372..0d943a826 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1648,32 +1648,32 @@ class ExprDecompiler { public: explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} - Id operator()(VideoCommon::Shader::ExprAnd& expr) { + Id operator()(const ExprAnd& expr) { const Id type_def = decomp.GetTypeDefinition(Type::Bool); const Id op1 = Visit(expr.operand1); const Id op2 = Visit(expr.operand2); return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2)); } - Id operator()(VideoCommon::Shader::ExprOr& expr) { + Id operator()(const ExprOr& expr) { const Id type_def = decomp.GetTypeDefinition(Type::Bool); const Id op1 = Visit(expr.operand1); const Id op2 = Visit(expr.operand2); return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2)); } - Id operator()(VideoCommon::Shader::ExprNot& expr) { + Id operator()(const ExprNot& expr) { const Id type_def = decomp.GetTypeDefinition(Type::Bool); const Id op1 = Visit(expr.operand1); return decomp.Emit(decomp.OpLogicalNot(type_def, op1)); } - Id operator()(VideoCommon::Shader::ExprPredicate& expr) { + Id operator()(const ExprPredicate& expr) { const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred))); } - Id operator()(VideoCommon::Shader::ExprCondCode& expr) { + Id operator()(const ExprCondCode& expr) { const Node cc = decomp.ir.GetConditionCode(expr.cc); Id target; @@ -1696,15 +1696,15 @@ public: return decomp.Emit(decomp.OpLoad(decomp.t_bool, target)); } - Id operator()(VideoCommon::Shader::ExprVar& expr) { + Id operator()(const ExprVar& expr) { return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index))); } - Id operator()(VideoCommon::Shader::ExprBoolean& expr) { + Id operator()(const ExprBoolean& expr) { return expr.value ? decomp.v_true : decomp.v_false; } - Id Visit(VideoCommon::Shader::Expr& node) { + Id Visit(const Expr& node) { return std::visit(*this, *node); } @@ -1716,7 +1716,7 @@ class ASTDecompiler { public: explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} - void operator()(VideoCommon::Shader::ASTProgram& ast) { + void operator()(const ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); while (current) { Visit(current); @@ -1724,7 +1724,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTIfThen& ast) { + void operator()(const ASTIfThen& ast) { ExprDecompiler expr_parser{decomp}; const Id condition = expr_parser.Visit(ast.condition); const Id then_label = decomp.OpLabel(); @@ -1741,33 +1741,33 @@ public: decomp.Emit(endif_label); } - void operator()(VideoCommon::Shader::ASTIfElse& ast) { + void operator()([[maybe_unused]] const ASTIfElse& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { + void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { + void operator()(const ASTBlockDecoded& ast) { decomp.VisitBasicBlock(ast.nodes); } - void operator()(VideoCommon::Shader::ASTVarSet& ast) { + void operator()(const ASTVarSet& ast) { ExprDecompiler expr_parser{decomp}; const Id condition = expr_parser.Visit(ast.condition); decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition)); } - void operator()(VideoCommon::Shader::ASTLabel& ast) { + void operator()([[maybe_unused]] const ASTLabel& ast) { // Do nothing } - void operator()(VideoCommon::Shader::ASTGoto& ast) { + void operator()([[maybe_unused]] const ASTGoto& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTDoWhile& ast) { + void operator()(const ASTDoWhile& ast) { const Id loop_label = decomp.OpLabel(); const Id endloop_label = decomp.OpLabel(); const Id loop_start_block = decomp.OpLabel(); @@ -1790,7 +1790,7 @@ public: decomp.Emit(endloop_label); } - void operator()(VideoCommon::Shader::ASTReturn& ast) { + void operator()(const ASTReturn& ast) { if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { ExprDecompiler expr_parser{decomp}; const Id condition = expr_parser.Visit(ast.condition); @@ -1820,7 +1820,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTBreak& ast) { + void operator()(const ASTBreak& ast) { if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { ExprDecompiler expr_parser{decomp}; const Id condition = expr_parser.Visit(ast.condition); @@ -1840,7 +1840,7 @@ public: } } - void Visit(VideoCommon::Shader::ASTNode& node) { + void Visit(const ASTNode& node) { std::visit(*this, *node->GetInnerData()); } @@ -1856,9 +1856,11 @@ void SPIRVDecompiler::DecompileAST() { Name(id, fmt::format("flow_var_{}", i)); flow_variables.emplace(i, AddGlobalVariable(id)); } + + const ASTNode program = ir.GetASTProgram(); ASTDecompiler decompiler{*this}; - VideoCommon::Shader::ASTNode program = ir.GetASTProgram(); decompiler.Visit(program); + const Id next_block = OpLabel(); Emit(OpBranch(next_block)); Emit(next_block); diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index 436d45f4b..e43aecc18 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp @@ -3,6 +3,9 @@ // Refer to the license.txt file included. #include <string> +#include <string_view> + +#include <fmt/format.h> #include "common/assert.h" #include "common/common_types.h" @@ -229,7 +232,8 @@ public: return inner; } - std::string inner{}; +private: + std::string inner; }; class ASTPrinter { @@ -249,7 +253,7 @@ public: void operator()(const ASTIfThen& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "if (" + expr_parser.GetResult() + ") {\n"; + inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); scope++; ASTNode current = ast.nodes.GetFirst(); while (current) { @@ -257,11 +261,13 @@ public: current = current->GetNext(); } scope--; - inner += Ident() + "}\n"; + inner += fmt::format("{}}}\n", Indent()); } void operator()(const ASTIfElse& ast) { - inner += Ident() + "else {\n"; + inner += Indent(); + inner += "else {\n"; + scope++; ASTNode current = ast.nodes.GetFirst(); while (current) { @@ -269,40 +275,41 @@ public: current = current->GetNext(); } scope--; - inner += Ident() + "}\n"; + + inner += Indent(); + inner += "}\n"; } void operator()(const ASTBlockEncoded& ast) { - inner += Ident() + "Block(" + std::to_string(ast.start) + ", " + std::to_string(ast.end) + - ");\n"; + inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); } - void operator()(const ASTBlockDecoded& ast) { - inner += Ident() + "Block;\n"; + void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { + inner += Indent(); + inner += "Block;\n"; } void operator()(const ASTVarSet& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += - Ident() + "V" + std::to_string(ast.index) + " := " + expr_parser.GetResult() + ";\n"; + inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); } void operator()(const ASTLabel& ast) { - inner += "Label_" + std::to_string(ast.index) + ":\n"; + inner += fmt::format("Label_{}:\n", ast.index); } void operator()(const ASTGoto& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "(" + expr_parser.GetResult() + ") -> goto Label_" + - std::to_string(ast.label) + ";\n"; + inner += + fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); } void operator()(const ASTDoWhile& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "do {\n"; + inner += fmt::format("{}do {{\n", Indent()); scope++; ASTNode current = ast.nodes.GetFirst(); while (current) { @@ -310,32 +317,23 @@ public: current = current->GetNext(); } scope--; - inner += Ident() + "} while (" + expr_parser.GetResult() + ");\n"; + inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); } void operator()(const ASTReturn& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "(" + expr_parser.GetResult() + ") -> " + - (ast.kills ? "discard" : "exit") + ";\n"; + inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), + ast.kills ? "discard" : "exit"); } void operator()(const ASTBreak& ast) { ExprPrinter expr_parser{}; std::visit(expr_parser, *ast.condition); - inner += Ident() + "(" + expr_parser.GetResult() + ") -> break;\n"; + inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); } - std::string& Ident() { - if (memo_scope == scope) { - return tabs_memo; - } - tabs_memo = tabs.substr(0, scope * 2); - memo_scope = scope; - return tabs_memo; - } - - void Visit(ASTNode& node) { + void Visit(const ASTNode& node) { std::visit(*this, *node->GetInnerData()); } @@ -344,16 +342,29 @@ public: } private: + std::string_view Indent() { + if (space_segment_scope == scope) { + return space_segment; + } + + // Ensure that we don't exceed our view. + ASSERT(scope * 2 < spaces.size()); + + space_segment = spaces.substr(0, scope * 2); + space_segment_scope = scope; + return space_segment; + } + std::string inner{}; - u32 scope{}; + std::string_view space_segment; - std::string tabs_memo{}; - u32 memo_scope{}; + u32 scope{}; + u32 space_segment_scope{}; - static constexpr std::string_view tabs{" "}; + static constexpr std::string_view spaces{" "}; }; -std::string ASTManager::Print() { +std::string ASTManager::Print() const { ASTPrinter printer{}; printer.Visit(main_node); return printer.GetResult(); @@ -549,13 +560,13 @@ bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) co return min->GetParent() == max->GetParent(); } -void ASTManager::ShowCurrentState(std::string_view state) { +void ASTManager::ShowCurrentState(std::string_view state) const { LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); SanityCheck(); } -void ASTManager::SanityCheck() { - for (auto& label : labels) { +void ASTManager::SanityCheck() const { + for (const auto& label : labels) { if (!label->GetParent()) { LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); } diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index d7bf11821..a2f0044ba 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -328,13 +328,13 @@ public: void InsertReturn(Expr condition, bool kills); - std::string Print(); + std::string Print() const; void Decompile(); - void ShowCurrentState(std::string_view state); + void ShowCurrentState(std::string_view state) const; - void SanityCheck(); + void SanityCheck() const; void Clear(); diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 268d1aed0..9d21f45de 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -473,8 +473,8 @@ void DecompileShader(CFGRebuildState& state) { state.manager->Decompile(); } -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address, +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address, const CompilerSettings& settings) { auto result_out = std::make_unique<ShaderCharacteristics>(); if (settings.depth == CompileDepth::BruteForce) { diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 74e54a5c7..37e987d62 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -76,8 +76,8 @@ struct ShaderCharacteristics { CompilerSettings settings{}; }; -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address, +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address, const CompilerSettings& settings); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 338bab17c..447fb5c1d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -410,7 +410,7 @@ public: explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} explicit OperationNode(OperationCode code, Meta meta) - : OperationNode(code, meta, std::vector<Node>{}) {} + : OperationNode(code, std::move(meta), std::vector<Node>{}) {} explicit OperationNode(OperationCode code, std::vector<Node> operands) : OperationNode(code, Meta{}, std::move(operands)) {} diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 250afc6d6..9a3c05288 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -212,6 +212,14 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, break; } break; + case Tegra::Texture::TextureFormat::A4B4G4R4: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::R4G4B4A4U; + default: + break; + } + break; case Tegra::Texture::TextureFormat::R8: switch (component_type) { case Tegra::Texture::ComponentType::UNORM: @@ -252,6 +260,7 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, default: break; } + break; case Tegra::Texture::TextureFormat::R32_G32_B32_A32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: @@ -350,6 +359,16 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5; case Tegra::Texture::TextureFormat::ASTC_2D_10X8: return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8; + case Tegra::Texture::TextureFormat::ASTC_2D_6X6: + return is_srgb ? PixelFormat::ASTC_2D_6X6_SRGB : PixelFormat::ASTC_2D_6X6; + case Tegra::Texture::TextureFormat::ASTC_2D_10X10: + return is_srgb ? PixelFormat::ASTC_2D_10X10_SRGB : PixelFormat::ASTC_2D_10X10; + case Tegra::Texture::TextureFormat::ASTC_2D_12X12: + return is_srgb ? PixelFormat::ASTC_2D_12X12_SRGB : PixelFormat::ASTC_2D_12X12; + case Tegra::Texture::TextureFormat::ASTC_2D_8X6: + return is_srgb ? PixelFormat::ASTC_2D_8X6_SRGB : PixelFormat::ASTC_2D_8X6; + case Tegra::Texture::TextureFormat::ASTC_2D_6X5: + return is_srgb ? PixelFormat::ASTC_2D_6X5_SRGB : PixelFormat::ASTC_2D_6X5; case Tegra::Texture::TextureFormat::R16_G16: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: @@ -510,6 +529,16 @@ bool IsPixelFormatASTC(PixelFormat format) { case PixelFormat::ASTC_2D_8X5_SRGB: case PixelFormat::ASTC_2D_10X8: case PixelFormat::ASTC_2D_10X8_SRGB: + case PixelFormat::ASTC_2D_6X6: + case PixelFormat::ASTC_2D_6X6_SRGB: + case PixelFormat::ASTC_2D_10X10: + case PixelFormat::ASTC_2D_10X10_SRGB: + case PixelFormat::ASTC_2D_12X12: + case PixelFormat::ASTC_2D_12X12_SRGB: + case PixelFormat::ASTC_2D_8X6: + case PixelFormat::ASTC_2D_8X6_SRGB: + case PixelFormat::ASTC_2D_6X5: + case PixelFormat::ASTC_2D_6X5_SRGB: return true; default: return false; @@ -530,6 +559,11 @@ bool IsPixelFormatSRGB(PixelFormat format) { case PixelFormat::ASTC_2D_5X4_SRGB: case PixelFormat::ASTC_2D_5X5_SRGB: case PixelFormat::ASTC_2D_10X8_SRGB: + case PixelFormat::ASTC_2D_6X6_SRGB: + case PixelFormat::ASTC_2D_10X10_SRGB: + case PixelFormat::ASTC_2D_12X12_SRGB: + case PixelFormat::ASTC_2D_8X6_SRGB: + case PixelFormat::ASTC_2D_6X5_SRGB: return true; default: return false; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 1e1c432a5..97668f802 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -67,27 +67,38 @@ enum class PixelFormat { DXT23_SRGB = 49, DXT45_SRGB = 50, BC7U_SRGB = 51, - ASTC_2D_4X4_SRGB = 52, - ASTC_2D_8X8_SRGB = 53, - ASTC_2D_8X5_SRGB = 54, - ASTC_2D_5X4_SRGB = 55, - ASTC_2D_5X5 = 56, - ASTC_2D_5X5_SRGB = 57, - ASTC_2D_10X8 = 58, - ASTC_2D_10X8_SRGB = 59, + R4G4B4A4U = 52, + ASTC_2D_4X4_SRGB = 53, + ASTC_2D_8X8_SRGB = 54, + ASTC_2D_8X5_SRGB = 55, + ASTC_2D_5X4_SRGB = 56, + ASTC_2D_5X5 = 57, + ASTC_2D_5X5_SRGB = 58, + ASTC_2D_10X8 = 59, + ASTC_2D_10X8_SRGB = 60, + ASTC_2D_6X6 = 61, + ASTC_2D_6X6_SRGB = 62, + ASTC_2D_10X10 = 63, + ASTC_2D_10X10_SRGB = 64, + ASTC_2D_12X12 = 65, + ASTC_2D_12X12_SRGB = 66, + ASTC_2D_8X6 = 67, + ASTC_2D_8X6_SRGB = 68, + ASTC_2D_6X5 = 69, + ASTC_2D_6X5_SRGB = 70, MaxColorFormat, // Depth formats - Z32F = 60, - Z16 = 61, + Z32F = 71, + Z16 = 72, MaxDepthFormat, // DepthStencil formats - Z24S8 = 62, - S8Z24 = 63, - Z32FS8 = 64, + Z24S8 = 73, + S8Z24 = 74, + Z32FS8 = 75, MaxDepthStencilFormat, @@ -177,6 +188,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 2, // DXT23_SRGB 2, // DXT45_SRGB 2, // BC7U_SRGB + 0, // R4G4B4A4U 2, // ASTC_2D_4X4_SRGB 2, // ASTC_2D_8X8_SRGB 2, // ASTC_2D_8X5_SRGB @@ -185,6 +197,16 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 2, // ASTC_2D_5X5_SRGB 2, // ASTC_2D_10X8 2, // ASTC_2D_10X8_SRGB + 2, // ASTC_2D_6X6 + 2, // ASTC_2D_6X6_SRGB + 2, // ASTC_2D_10X10 + 2, // ASTC_2D_10X10_SRGB + 2, // ASTC_2D_12X12 + 2, // ASTC_2D_12X12_SRGB + 2, // ASTC_2D_8X6 + 2, // ASTC_2D_8X6_SRGB + 2, // ASTC_2D_6X5 + 2, // ASTC_2D_6X5_SRGB 0, // Z32F 0, // Z16 0, // Z24S8 @@ -261,6 +283,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 4, // DXT23_SRGB 4, // DXT45_SRGB 4, // BC7U_SRGB + 1, // R4G4B4A4U 4, // ASTC_2D_4X4_SRGB 8, // ASTC_2D_8X8_SRGB 8, // ASTC_2D_8X5_SRGB @@ -269,6 +292,16 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 5, // ASTC_2D_5X5_SRGB 10, // ASTC_2D_10X8 10, // ASTC_2D_10X8_SRGB + 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_SRGB + 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_SRGB + 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_SRGB + 8, // ASTC_2D_8X6 + 8, // ASTC_2D_8X6_SRGB + 6, // ASTC_2D_6X5 + 6, // ASTC_2D_6X5_SRGB 1, // Z32F 1, // Z16 1, // Z24S8 @@ -285,71 +318,82 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // RGBX16F - 1, // R32UI - 8, // ASTC_2D_8X8 - 5, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 4, // ASTC_2D_4X4_SRGB - 8, // ASTC_2D_8X8_SRGB - 5, // ASTC_2D_8X5_SRGB - 4, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 - 5, // ASTC_2D_5X5_SRGB - 8, // ASTC_2D_10X8 - 8, // ASTC_2D_10X8_SRGB - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // RGBX16F + 1, // R32UI + 8, // ASTC_2D_8X8 + 5, // ASTC_2D_8X5 + 4, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 1, // R4G4B4A4U + 4, // ASTC_2D_4X4_SRGB + 8, // ASTC_2D_8X8_SRGB + 5, // ASTC_2D_8X5_SRGB + 4, // ASTC_2D_5X4_SRGB + 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_SRGB + 8, // ASTC_2D_10X8 + 8, // ASTC_2D_10X8_SRGB + 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_SRGB + 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_SRGB + 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_SRGB + 6, // ASTC_2D_8X6 + 6, // ASTC_2D_8X6_SRGB + 5, // ASTC_2D_6X5 + 5, // ASTC_2D_6X5_SRGB + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 }}; static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { @@ -413,6 +457,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 128, // DXT23_SRGB 128, // DXT45_SRGB 128, // BC7U + 16, // R4G4B4A4U 128, // ASTC_2D_4X4_SRGB 128, // ASTC_2D_8X8_SRGB 128, // ASTC_2D_8X5_SRGB @@ -421,6 +466,16 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 128, // ASTC_2D_5X5_SRGB 128, // ASTC_2D_10X8 128, // ASTC_2D_10X8_SRGB + 128, // ASTC_2D_6X6 + 128, // ASTC_2D_6X6_SRGB + 128, // ASTC_2D_10X10 + 128, // ASTC_2D_10X10_SRGB + 128, // ASTC_2D_12X12 + 128, // ASTC_2D_12X12_SRGB + 128, // ASTC_2D_8X6 + 128, // ASTC_2D_8X6_SRGB + 128, // ASTC_2D_6X5 + 128, // ASTC_2D_6X5_SRGB 32, // Z32F 16, // Z16 32, // Z24S8 @@ -504,6 +559,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::Compressed, // DXT23_SRGB SurfaceCompression::Compressed, // DXT45_SRGB SurfaceCompression::Compressed, // BC7U_SRGB + SurfaceCompression::None, // R4G4B4A4U SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB @@ -512,6 +568,16 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB SurfaceCompression::Converted, // ASTC_2D_10X8 SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB + SurfaceCompression::Converted, // ASTC_2D_6X6 + SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB + SurfaceCompression::Converted, // ASTC_2D_10X10 + SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB + SurfaceCompression::Converted, // ASTC_2D_12X12 + SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X6 + SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB + SurfaceCompression::Converted, // ASTC_2D_6X5 + SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB SurfaceCompression::None, // Z32F SurfaceCompression::None, // Z16 SurfaceCompression::None, // Z24S8 diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ca2da8f97..6a92b22d3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -62,10 +62,10 @@ public: } } - /*** - * `Guard` guarantees that rendertargets don't unregister themselves if the + /** + * Guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. - ***/ + */ void GuardRenderTargets(bool new_guard) { guard_render_targets = new_guard; } @@ -287,7 +287,7 @@ protected: const Tegra::Engines::Fermi2D::Config& copy_config) = 0; // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture - // and reading it from a sepparate buffer. + // and reading it from a separate buffer. virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void ManageRenderTargetUnregister(TSurface& surface) { @@ -386,12 +386,13 @@ private: }; /** - * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. - * @param untopological, tells the recycler that the texture has no way to match the overlaps - * due to topological reasons. + * Takes care of selecting a proper strategy to deal with a texture recycle. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. + * @param untopological Indicates to the recycler that the texture has no way + * to match the overlaps due to topological reasons. **/ RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { @@ -402,7 +403,7 @@ private: if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } - for (auto s : overlaps) { + for (const auto& s : overlaps) { const auto& s_params = s->GetSurfaceParams(); if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -419,16 +420,19 @@ private: } /** - * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in - *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the - *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the - *new surface from that data. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. - * @param preserve_contents, tells if the new surface should be loaded from meory or left blank - * @param untopological, tells the recycler that the texture has no way to match the overlaps - * due to topological reasons. + * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented + * strategies: Ignore and Flush. + * + * - Ignore: Just unregisters all the overlaps and loads the new texture. + * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters for the new surface. + * @param gpu_addr The starting address of the new surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or left + * blank. + * @param untopological Indicates to the recycler that the texture has no way to match the + * overlaps due to topological reasons. **/ std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, @@ -465,10 +469,12 @@ private: } /** - * `RebuildSurface` this method takes a single surface and recreates into another that - * may differ in format, target or width alingment. - * @param current_surface, the registered surface in the cache which we want to convert. - * @param params, the new surface params which we'll use to recreate the surface. + * Takes a single surface and recreates into another that may differ in + * format, target or width alignment. + * + * @param current_surface The registered surface in the cache which we want to convert. + * @param params The new surface params which we'll use to recreate the surface. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, bool is_render) { @@ -502,12 +508,14 @@ private: } /** - * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's - * params if it's an exact match, we return the main view of the registered surface. If it's - * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats + * Takes a single surface and checks with the new surface's params if it's an exact + * match, we return the main view of the registered surface. If its formats don't + * match, we rebuild the surface. We call this last method a `Mirage`. If formats * match but the targets don't, we create an overview View of the registered surface. - * @param current_surface, the registered surface in the cache which we want to convert. - * @param params, the new surface params which we want to check. + * + * @param current_surface The registered surface in the cache which we want to convert. + * @param params The new surface params which we want to check. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, const SurfaceParams& params, bool is_render) { @@ -529,13 +537,14 @@ private: } /** - * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface - * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps - * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface - * for them, else we return nothing. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. + * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate + * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps + * of the new surface, if they all match we end up recreating a surface for them, + * else we return nothing. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. **/ std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, const SurfaceParams& params, @@ -575,7 +584,7 @@ private: } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { return {}; } - for (auto surface : overlaps) { + for (const auto& surface : overlaps) { Unregister(surface); } new_surface->MarkAsModified(modified, Tick()); @@ -584,19 +593,27 @@ private: } /** - * `GetSurface` gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache. This is done in 3 big steps. The first is to - * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. - * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from - * memory else we move to step 3. Step 3 consists on figuring the relationship between the - * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many - * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the - * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to - * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface - * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the paremeters on the candidate surface. - * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. + * Gets the starting address and parameters of a candidate surface and tries + * to find a matching surface within the cache. This is done in 3 big steps: + * + * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. + * + * 2. Check if there are any overlaps at all, if there are none, we just load the texture from + * memory else we move to step 3. + * + * 3. Consists of figuring out the relationship between the candidate texture and the + * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If + * there's many, we just try to reconstruct a new surface out of them based on the + * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we + * have to check if the candidate is a view (layer/mipmap) of the overlap or if the + * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct + * a new surface. + * + * @param gpu_addr The starting address of the candidate surface. + * @param params The parameters on the candidate surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or + * left blank. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { @@ -651,7 +668,7 @@ private: // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails - // inmediatly recycle the texture + // immediately recycle the texture for (const auto& surface : overlaps) { const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { @@ -720,12 +737,13 @@ private: } /** - * `DeduceSurface` gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache that's similar to it. If there are many textures + * Gets the starting address and parameters of a candidate surface and tries to find a + * matching surface within the cache that's similar to it. If there are many textures * or the texture found if entirely incompatible, it will fail. If no texture is found, the * blit will be unsuccessful. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the paremeters on the candidate surface. + * + * @param gpu_addr The starting address of the candidate surface. + * @param params The parameters on the candidate surface. **/ Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; @@ -777,11 +795,14 @@ private: } /** - * `DeduceBestBlit` gets the a source and destination starting address and parameters, + * Gets the a source and destination starting address and parameters, * and tries to deduce if they are supposed to be depth textures. If so, their * parameters are modified and fixed into so. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the parameters on the candidate surface. + * + * @param src_params The parameters of the candidate surface. + * @param dst_params The parameters of the destination surface. + * @param src_gpu_addr The starting address of the candidate surface. + * @param dst_gpu_addr The starting address of the destination surface. **/ void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { |