From 877cd60b00a3f827062fdaff93183b52174ec134 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Wed, 15 Sep 2021 20:32:54 -0400
Subject: gpu: Use std::jthread for async gpu thread

---
 src/core/core.cpp             |  6 +----
 src/video_core/gpu.cpp        |  8 ------
 src/video_core/gpu.h          |  3 ---
 src/video_core/gpu_thread.cpp | 57 ++++++++++---------------------------------
 src/video_core/gpu_thread.h   | 13 +++-------
 5 files changed, 18 insertions(+), 69 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index b13350f6e..54ebed2c1 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -305,10 +305,7 @@ struct System::Impl {
         is_powered_on = false;
         exit_lock = false;
 
-        if (gpu_core) {
-            gpu_core->ShutDown();
-        }
-
+        gpu_core.reset();
         services.reset();
         service_manager.reset();
         cheat_engine.reset();
@@ -317,7 +314,6 @@ struct System::Impl {
         time_manager.Shutdown();
         core_timing.Shutdown();
         app_loader.reset();
-        gpu_core.reset();
         perf_stats.reset();
         kernel.Shutdown();
         memory.Reset();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ff024f530..2ae3639b5 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -531,14 +531,6 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
     interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
 }
 
-void GPU::ShutDown() {
-    // Signal that threads should no longer block on syncpoint fences
-    shutting_down.store(true, std::memory_order_relaxed);
-    sync_cv.notify_all();
-
-    gpu_thread.ShutDown();
-}
-
 void GPU::OnCommandListEnd() {
     if (is_async) {
         // This command only applies to asynchronous GPU mode
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index a8e98e51b..e6a02a71b 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -219,9 +219,6 @@ public:
         return *shader_notify;
     }
 
-    // Stops the GPU execution and waits for the GPU to finish working
-    void ShutDown();
-
     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
     void WaitFence(u32 syncpoint_id, u32 value);
 
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 46f642b19..9547f277a 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -17,9 +17,9 @@
 namespace VideoCommon::GPUThread {
 
 /// Runs the GPU thread
-static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
-                      Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
-                      SynchState& state) {
+static void RunThread(std::stop_token stop_token, Core::System& system,
+                      VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
+                      Tegra::DmaPusher& dma_pusher, SynchState& state) {
     std::string name = "yuzu:GPU";
     MicroProfileOnThreadCreate(name.c_str());
     SCOPE_EXIT({ MicroProfileOnThreadExit(); });
@@ -28,20 +28,14 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
     Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
     system.RegisterHostThread();
 
-    // Wait for first GPU command before acquiring the window context
-    state.queue.Wait();
-
-    // If emulation was stopped during disk shader loading, abort before trying to acquire context
-    if (!state.is_running) {
-        return;
-    }
-
     auto current_context = context.Acquire();
     VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
 
-    CommandDataContainer next;
-    while (state.is_running) {
-        next = state.queue.PopWait();
+    while (!stop_token.stop_requested()) {
+        CommandDataContainer next = state.queue.PopWait(stop_token);
+        if (stop_token.stop_requested()) {
+            break;
+        }
         if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
             dma_pusher.Push(std::move(submit_list->entries));
             dma_pusher.DispatchCalls();
@@ -55,8 +49,6 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
             rasterizer->FlushRegion(flush->addr, flush->size);
         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
             rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
-        } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
-            ASSERT(state.is_running == false);
         } else {
             UNREACHABLE();
         }
@@ -73,16 +65,14 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
 ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
     : system{system_}, is_async{is_async_} {}
 
-ThreadManager::~ThreadManager() {
-    ShutDown();
-}
+ThreadManager::~ThreadManager() = default;
 
 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                 Core::Frontend::GraphicsContext& context,
                                 Tegra::DmaPusher& dma_pusher) {
     rasterizer = renderer.ReadRasterizer();
-    thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
-                         std::ref(dma_pusher), std::ref(state));
+    thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
+                          std::ref(dma_pusher), std::ref(state));
 }
 
 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@@ -117,26 +107,6 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     rasterizer->OnCPUWrite(addr, size);
 }
 
-void ThreadManager::ShutDown() {
-    if (!state.is_running) {
-        return;
-    }
-
-    {
-        std::lock_guard lk(state.write_lock);
-        state.is_running = false;
-        state.cv.notify_all();
-    }
-
-    if (!thread.joinable()) {
-        return;
-    }
-
-    // Notify GPU thread that a shutdown is pending
-    PushCommand(EndProcessingCommand());
-    thread.join();
-}
-
 void ThreadManager::OnCommandListEnd() {
     PushCommand(OnCommandListEndCommand());
 }
@@ -152,9 +122,8 @@ u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
     state.queue.Push(CommandDataContainer(std::move(command_data), fence, block));
 
     if (block) {
-        state.cv.wait(lk, [this, fence] {
-            return fence <= state.signaled_fence.load(std::memory_order_relaxed) ||
-                   !state.is_running;
+        state.cv.wait(lk, thread.get_stop_token(), [this, fence] {
+            return fence <= state.signaled_fence.load(std::memory_order_relaxed);
         });
     }
 
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 11a648f38..91bada925 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -33,9 +33,6 @@ class RendererBase;
 
 namespace VideoCommon::GPUThread {
 
-/// Command to signal to the GPU thread that processing has ended
-struct EndProcessingCommand final {};
-
 /// Command to signal to the GPU thread that a command list is ready for processing
 struct SubmitListCommand final {
     explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
@@ -83,7 +80,7 @@ struct OnCommandListEndCommand final {};
 struct GPUTickCommand final {};
 
 using CommandData =
-    std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+    std::variant<std::monostate, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
                  InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
                  GPUTickCommand>;
 
@@ -100,14 +97,12 @@ struct CommandDataContainer {
 
 /// Struct used to synchronize the GPU thread
 struct SynchState final {
-    std::atomic_bool is_running{true};
-
-    using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+    using CommandQueue = Common::SPSCQueue<CommandDataContainer, true>;
     std::mutex write_lock;
     CommandQueue queue;
     u64 last_fence{};
     std::atomic<u64> signaled_fence{};
-    std::condition_variable cv;
+    std::condition_variable_any cv;
 };
 
 /// Class used to manage the GPU thread
@@ -149,7 +144,7 @@ private:
     VideoCore::RasterizerInterface* rasterizer = nullptr;
 
     SynchState state;
-    std::thread thread;
+    std::jthread thread;
 };
 
 } // namespace VideoCommon::GPUThread
-- 
cgit v1.2.3