diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 5 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 48 | ||||
-rw-r--r-- | src/video_core/gpu.h | 34 | ||||
-rw-r--r-- | src/video_core/gpu_asynch.cpp | 9 | ||||
-rw-r--r-- | src/video_core/gpu_asynch.h | 3 | ||||
-rw-r--r-- | src/video_core/gpu_synch.cpp | 2 | ||||
-rw-r--r-- | src/video_core/gpu_synch.h | 4 | ||||
-rw-r--r-- | src/video_core/gpu_thread.cpp | 27 | ||||
-rw-r--r-- | src/video_core/gpu_thread.h | 32 |
9 files changed, 105 insertions, 59 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 74c46ec04..125c53360 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -525,8 +525,9 @@ void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); const u32 increment = regs.sync_info.increment.Value(); const u32 cache_flush = regs.sync_info.unknown.Value(); - LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, - cache_flush); + if (increment) { + system.GPU().IncrementSyncPoint(sync_point); + } } void Maxwell3D::DrawArrays() { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 21007d8b2..1622332a4 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -29,7 +29,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { UNREACHABLE(); } -GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { +GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) + : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); @@ -74,6 +75,51 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::IncrementSyncPoint(const u32 syncpoint_id) { + syncpoints[syncpoint_id]++; + std::lock_guard lock{sync_mutex}; + if (!syncpt_interrupts[syncpoint_id].empty()) { + u32 value = syncpoints[syncpoint_id].load(); + auto it = syncpt_interrupts[syncpoint_id].begin(); + while (it != syncpt_interrupts[syncpoint_id].end()) { + if (value >= *it) { + TriggerCpuInterrupt(syncpoint_id, *it); + it = syncpt_interrupts[syncpoint_id].erase(it); + continue; + } + it++; + } + } +} + +u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { + return syncpoints[syncpoint_id].load(); +} + +void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { + auto& interrupt = syncpt_interrupts[syncpoint_id]; + bool contains = std::any_of(interrupt.begin(), interrupt.end(), + [value](u32 in_value) { return in_value == value; }); + if (contains) { + return; + } + syncpt_interrupts[syncpoint_id].emplace_back(value); +} + +bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { + std::lock_guard lock{sync_mutex}; + auto& interrupt = syncpt_interrupts[syncpoint_id]; + const auto iter = + std::find_if(interrupt.begin(), interrupt.end(), + [value](u32 interrupt_value) { return value == interrupt_value; }); + + if (iter == interrupt.end()) { + return false; + } + interrupt.erase(iter); + return true; +} + u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 0055e5326..87c96f46b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -5,8 +5,12 @@ #pragma once #include <array> +#include <atomic> +#include <list> #include <memory> +#include <mutex> #include "common/common_types.h" +#include "core/hle/service/nvdrv/nvdata.h" #include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/dma_pusher.h" @@ -127,7 +131,7 @@ class MemoryManager; class GPU { public: - explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); + explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async); virtual ~GPU(); @@ -170,6 +174,22 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + void IncrementSyncPoint(u32 syncpoint_id); + + u32 GetSyncpointValue(u32 syncpoint_id) const; + + void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); + + bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); + + std::unique_lock<std::mutex> LockSync() { + return std::unique_lock{sync_mutex}; + } + + bool IsAsync() const { + return is_async; + } + /// Returns a const reference to the GPU DMA pusher. const Tegra::DmaPusher& DmaPusher() const; @@ -239,6 +259,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; +protected: + virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; + private: void ProcessBindMethod(const MethodCall& method_call); void ProcessSemaphoreTriggerMethod(); @@ -257,6 +280,7 @@ private: protected: std::unique_ptr<Tegra::DmaPusher> dma_pusher; VideoCore::RendererBase& renderer; + Core::System& system; private: std::unique_ptr<Tegra::MemoryManager> memory_manager; @@ -273,6 +297,14 @@ private: std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; /// Inline memory engine std::unique_ptr<Engines::KeplerMemory> kepler_memory; + + std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; + + std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; + + std::mutex sync_mutex; + + const bool is_async; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index d4e2553a9..ea67be831 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "core/core.h" +#include "core/hardware_interrupt_manager.h" #include "video_core/gpu_asynch.h" #include "video_core/gpu_thread.h" #include "video_core/renderer_base.h" @@ -9,7 +11,7 @@ namespace VideoCommon { GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) - : GPU(system, renderer), gpu_thread{system} {} + : GPU(system, renderer, true), gpu_thread{system} {} GPUAsynch::~GPUAsynch() = default; @@ -38,4 +40,9 @@ void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { gpu_thread.FlushAndInvalidateRegion(addr, size); } +void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { + auto& interrupt_manager = system.InterruptManager(); + interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); +} + } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 30be74cba..36377d677 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -27,6 +27,9 @@ public: void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; +protected: + void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; + private: GPUThread::ThreadManager gpu_thread; }; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 45e43b1dc..d4ead9c47 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -8,7 +8,7 @@ namespace VideoCommon { GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) - : GPU(system, renderer) {} + : GPU(system, renderer, false) {} GPUSynch::~GPUSynch() = default; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 3031fcf72..07bcc47f1 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -25,6 +25,10 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + +protected: + void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, + [[maybe_unused]] u32 value) const override {} }; } // namespace VideoCommon diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 3f0939ec9..b441e92b0 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -21,7 +21,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p MicroProfileOnThreadCreate("GpuThread"); // Wait for first GPU command before acquiring the window context - state.WaitForCommands(); + while (state.queue.Empty()) + ; // If emulation was stopped during disk shader loading, abort before trying to acquire context if (!state.is_running) { @@ -32,7 +33,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p CommandDataContainer next; while (state.is_running) { - state.WaitForCommands(); while (!state.queue.Empty()) { state.queue.Pop(next); if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { @@ -49,8 +49,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p } else { UNREACHABLE(); } - state.signaled_fence = next.fence; - state.TrySynchronize(); + state.signaled_fence.store(next.fence); } } } @@ -89,12 +88,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { } void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { - if (state.queue.Empty()) { - // It's quicker to invalidate a single region on the CPU if the queue is already empty - system.Renderer().Rasterizer().InvalidateRegion(addr, size); - } else { - PushCommand(InvalidateRegionCommand(addr, size)); - } + system.Renderer().Rasterizer().InvalidateRegion(addr, size); } void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { @@ -105,22 +99,13 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); - state.SignalCommands(); return fence; } MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); void SynchState::WaitForSynchronization(u64 fence) { - if (signaled_fence >= fence) { - return; - } - - // Wait for the GPU to be idle (all commands to be executed) - { - MICROPROFILE_SCOPE(GPU_wait); - std::unique_lock lock{synchronization_mutex}; - synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); - } + while (signaled_fence.load() < fence) + ; } } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 05a168a72..1d9d0c39e 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -88,41 +88,9 @@ struct CommandDataContainer { /// Struct used to synchronize the GPU thread struct SynchState final { std::atomic_bool is_running{true}; - std::atomic_int queued_frame_count{}; - std::mutex synchronization_mutex; - std::mutex commands_mutex; - std::condition_variable commands_condition; - std::condition_variable synchronization_condition; - - /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU - /// synchronized. This is entirely empirical. - bool IsSynchronized() const { - constexpr std::size_t max_queue_gap{5}; - return queue.Size() <= max_queue_gap; - } - - void TrySynchronize() { - if (IsSynchronized()) { - std::lock_guard lock{synchronization_mutex}; - synchronization_condition.notify_one(); - } - } void WaitForSynchronization(u64 fence); - void SignalCommands() { - if (queue.Empty()) { - return; - } - - commands_condition.notify_one(); - } - - void WaitForCommands() { - std::unique_lock lock{commands_mutex}; - commands_condition.wait(lock, [this] { return !queue.Empty(); }); - } - using CommandQueue = Common::SPSCQueue<CommandDataContainer>; CommandQueue queue; u64 last_fence{}; |