From a9ca39f8591532ba6d37f7a3e068d5eefe416464 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 7 Feb 2022 07:52:04 +0100 Subject: NVDRV: Further improvements. --- src/video_core/engines/maxwell_3d.cpp | 18 +++++------------- src/video_core/engines/maxwell_dma.cpp | 18 +++++++++++++----- src/video_core/engines/puller.cpp | 18 ++++-------------- 3 files changed, 22 insertions(+), 32 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 632052c53..3c6e44a25 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -453,18 +453,10 @@ void Maxwell3D::ProcessFirmwareCall4() { } void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { - struct LongQueryResult { - u64_le value; - u64_le timestamp; - }; - static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); const GPUVAddr sequence_address{regs.query.QueryAddress()}; if (long_query) { - // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast - // GPU, this command may actually take a while to complete in real hardware due to GPU - // wait queues. - LongQueryResult query_result{payload, system.GPU().GetTicks()}; - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + memory_manager.Write(sequence_address + sizeof(u64), system.GPU().GetTicks()); + memory_manager.Write(sequence_address, payload); } else { memory_manager.Write(sequence_address, static_cast(payload)); } @@ -493,10 +485,10 @@ void Maxwell3D::ProcessQueryGet() { const GPUVAddr sequence_address{regs.query.QueryAddress()}; const u32 payload = regs.query.query_sequence; std::function operation([this, sequence_address, payload] { - LongQueryResult query_result{payload, system.GPU().GetTicks()}; - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + memory_manager.Write(sequence_address + sizeof(u64), system.GPU().GetTicks()); + memory_manager.Write(sequence_address, payload); }); - rasterizer->SignalFence(std::move(operation)); + rasterizer->SyncOperation(std::move(operation)); } break; case Regs::QueryOperation::Acquire: diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a12a95ce2..bcffd1862 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -274,16 +274,24 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { void MaxwellDMA::ReleaseSemaphore() { const auto type = regs.launch_dma.semaphore_type; const GPUVAddr address = regs.semaphore.address; + const u32 payload = regs.semaphore.payload; switch (type) { case LaunchDMA::SemaphoreType::NONE: break; - case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: - memory_manager.Write(address, regs.semaphore.payload); + case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { + std::function operation( + [this, address, payload] { memory_manager.Write(address, payload); }); + rasterizer->SignalFence(std::move(operation)); break; - case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: - memory_manager.Write(address, static_cast(regs.semaphore.payload)); - memory_manager.Write(address + 8, system.GPU().GetTicks()); + } + case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { + std::function operation([this, address, payload] { + memory_manager.Write(address + sizeof(u64), system.GPU().GetTicks()); + memory_manager.Write(address, payload); + }); + rasterizer->SignalFence(std::move(operation)); break; + } default: ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast(type.Value())); } diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index dd9494efa..c3ed11c13 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -59,6 +59,7 @@ void Puller::ProcessFenceActionMethod() { case Puller::FenceOperation::Acquire: // UNIMPLEMENTED_MSG("Channel Scheduling pending."); // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); + rasterizer->ReleaseFences(); break; case Puller::FenceOperation::Increment: rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); @@ -73,19 +74,11 @@ void Puller::ProcessSemaphoreTriggerMethod() { const auto op = static_cast(regs.semaphore_trigger & semaphoreOperationMask); if (op == GpuSemaphoreOperation::WriteLong) { - struct Block { - u32 sequence; - u32 zeros = 0; - u64 timestamp; - }; - const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_sequence; std::function operation([this, sequence_address, payload] { - Block block{}; - block.sequence = payload; - block.timestamp = gpu.GetTicks(); - memory_manager.WriteBlock(sequence_address, &block, sizeof(block)); + memory_manager.Write(sequence_address + sizeof(u64), gpu.GetTicks()); + memory_manager.Write(sequence_address, payload); }); rasterizer->SignalFence(std::move(operation)); } else { @@ -98,7 +91,6 @@ void Puller::ProcessSemaphoreTriggerMethod() { regs.acquire_mode = false; if (word != regs.acquire_value) { rasterizer->ReleaseFences(); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } } else if (op == GpuSemaphoreOperation::AcquireGequal) { @@ -106,13 +98,11 @@ void Puller::ProcessSemaphoreTriggerMethod() { regs.acquire_mode = true; if (word < regs.acquire_value) { rasterizer->ReleaseFences(); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } } else if (op == GpuSemaphoreOperation::AcquireMask) { if (word && regs.semaphore_sequence == 0) { rasterizer->ReleaseFences(); - std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } } else { @@ -128,7 +118,7 @@ void Puller::ProcessSemaphoreRelease() { std::function operation([this, sequence_address, payload] { memory_manager.Write(sequence_address, payload); }); - rasterizer->SignalFence(std::move(operation)); + rasterizer->SyncOperation(std::move(operation)); } void Puller::ProcessSemaphoreAcquire() { -- cgit v1.2.3