diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/command_classes/codecs/vp9.cpp | 16 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 4 | ||||
-rw-r--r-- | src/video_core/gpu.h | 4 | ||||
-rw-r--r-- | src/video_core/gpu_thread.cpp | 62 | ||||
-rw-r--r-- | src/video_core/gpu_thread.h | 15 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/renderer_vulkan.cpp | 5 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_swapchain.cpp | 8 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_swapchain.h | 2 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 9 |
10 files changed, 83 insertions, 44 deletions
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 59e586695..29bb31418 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -2,8 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cstring> // for std::memcpy +#include <algorithm> // for std::copy #include <numeric> +#include "common/assert.h" #include "video_core/command_classes/codecs/vp9.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -362,7 +363,8 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following // order: last, golden, altref, current. It may be worthwhile to track the updates done here // to avoid buffering frame data needed for reference frame updating in the header composition. - std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64)); + std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4, + vp9_info.frame_offsets.begin()); return vp9_info; } @@ -821,11 +823,11 @@ const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters // Write headers and frame to buffer frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size()); - std::memcpy(frame.data(), uncompressed_header.data(), uncompressed_header.size()); - std::memcpy(frame.data() + uncompressed_header.size(), compressed_header.data(), - compressed_header.size()); - std::memcpy(frame.data() + uncompressed_header.size() + compressed_header.size(), - bitstream.data(), bitstream.size()); + std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame.begin()); + std::copy(compressed_header.begin(), compressed_header.end(), + frame.begin() + uncompressed_header.size()); + std::copy(bitstream.begin(), bitstream.end(), + frame.begin() + uncompressed_header.size() + compressed_header.size()); // keep track of frame number current_frame_number++; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index c61f44619..009c6f574 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -517,8 +517,8 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } -void GPU::WaitIdle() const { - gpu_thread.WaitIdle(); +void GPU::ShutDown() { + gpu_thread.ShutDown(); } void GPU::OnCommandListEnd() { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b2ee45496..ecab35d3b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -219,8 +219,8 @@ public: return *shader_notify; } - // Waits for the GPU to finish working - void WaitIdle() const; + // Stops the GPU execution and waits for the GPU to finish working + void ShutDown(); /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value); diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 99353f15f..7addfbc7b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -29,8 +29,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, system.RegisterHostThread(); // Wait for first GPU command before acquiring the window context - while (state.queue.Empty()) - ; + state.queue.Wait(); // If emulation was stopped during disk shader loading, abort before trying to acquire context if (!state.is_running) { @@ -57,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { - return; + ASSERT(state.is_running == false); } else { UNREACHABLE(); } state.signaled_fence.store(next.fence); + if (next.block) { + // We have to lock the write_lock to ensure that the condition_variable wait not get a + // race between the check and the lock itself. + std::lock_guard lk(state.write_lock); + state.cv.notify_all(); + } } } @@ -69,13 +74,7 @@ ThreadManager::ThreadManager(Core::System& system_, bool is_async_) : system{system_}, is_async{is_async_} {} ThreadManager::~ThreadManager() { - if (!thread.joinable()) { - return; - } - - // Notify GPU thread that a shutdown is pending - PushCommand(EndProcessingCommand()); - thread.join(); + ShutDown(); } void ThreadManager::StartThread(VideoCore::RendererBase& renderer, @@ -112,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { case Settings::GPUAccuracy::Extreme: { auto& gpu = system.GPU(); u64 fence = gpu.RequestFlush(addr, size); - PushCommand(GPUTickCommand()); - while (fence > gpu.CurrentFlushRequestFence()) { - } + PushCommand(GPUTickCommand(), true); + ASSERT(fence <= gpu.CurrentFlushRequestFence()); break; } default: @@ -131,23 +129,45 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { rasterizer->OnCPUWrite(addr, size); } -void ThreadManager::WaitIdle() const { - while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && - system.IsPoweredOn()) { +void ThreadManager::ShutDown() { + if (!state.is_running) { + return; } + + { + std::lock_guard lk(state.write_lock); + state.is_running = false; + state.cv.notify_all(); + } + + if (!thread.joinable()) { + return; + } + + // Notify GPU thread that a shutdown is pending + PushCommand(EndProcessingCommand()); + thread.join(); } void ThreadManager::OnCommandListEnd() { PushCommand(OnCommandListEndCommand()); } -u64 ThreadManager::PushCommand(CommandData&& command_data) { - const u64 fence{++state.last_fence}; - state.queue.Push(CommandDataContainer(std::move(command_data), fence)); - +u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { if (!is_async) { // In synchronous GPU mode, block the caller until the command has executed - WaitIdle(); + block = true; + } + + std::unique_lock lk(state.write_lock); + const u64 fence{++state.last_fence}; + state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); + + if (block) { + state.cv.wait(lk, [this, fence] { + return fence <= state.signaled_fence.load(std::memory_order_relaxed) || + !state.is_running; + }); } return fence; diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 18269e51c..11a648f38 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -90,21 +90,24 @@ using CommandData = struct CommandDataContainer { CommandDataContainer() = default; - explicit CommandDataContainer(CommandData&& data_, u64 next_fence_) - : data{std::move(data_)}, fence{next_fence_} {} + explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_) + : data{std::move(data_)}, fence{next_fence_}, block(block_) {} CommandData data; u64 fence{}; + bool block{}; }; /// Struct used to synchronize the GPU thread struct SynchState final { std::atomic_bool is_running{true}; - using CommandQueue = Common::MPSCQueue<CommandDataContainer>; + using CommandQueue = Common::SPSCQueue<CommandDataContainer>; + std::mutex write_lock; CommandQueue queue; u64 last_fence{}; std::atomic<u64> signaled_fence{}; + std::condition_variable cv; }; /// Class used to manage the GPU thread @@ -132,14 +135,14 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(VAddr addr, u64 size); - // Wait until the gpu thread is idle. - void WaitIdle() const; + // Stops the GPU execution and waits for the GPU to finish working + void ShutDown(); void OnCommandListEnd(); private: /// Pushes a command to be executed by the GPU thread - u64 PushCommand(CommandData&& command_data); + u64 PushCommand(CommandData&& command_data, bool block = false); Core::System& system; const bool is_async; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 1cc720ddd..14e5f36e2 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -143,7 +143,10 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { scheduler.WaitWorker(); - swapchain.AcquireNextImage(); + while (!swapchain.AcquireNextImage()) { + swapchain.Create(layout.width, layout.height, is_srgb); + blit_screen.Recreate(); + } const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); scheduler.Flush(render_semaphore); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 668633e7b..8cb65e588 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -176,7 +176,7 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset u32 stride) { if (device.IsExtExtendedDynamicStateSupported()) { scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { - const VkDeviceSize vk_offset = offset; + const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0; const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; const VkDeviceSize vk_stride = stride; cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 0b63bd6c8..dfd5c65ba 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -82,11 +82,13 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { resource_ticks.resize(image_count); } -void VKSwapchain::AcquireNextImage() { - device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), - *present_semaphores[frame_index], {}, &image_index); +bool VKSwapchain::AcquireNextImage() { + const VkResult result = + device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), + *present_semaphores[frame_index], {}, &image_index); scheduler.Wait(resource_ticks[image_index]); + return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; } bool VKSwapchain::Present(VkSemaphore render_semaphore) { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index a728511e0..adc8d27cf 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -28,7 +28,7 @@ public: void Create(u32 width, u32 height, bool srgb); /// Acquires the next image in the swapchain, waits as needed. - void AcquireNextImage(); + bool AcquireNextImage(); /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be /// recreated. Takes responsability for the ownership of fence. diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 697cb16b9..230b8717b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -294,6 +294,15 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, bit8_storage); + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, + .pNext = nullptr, + .robustBufferAccess2 = true, + .robustImageAccess2 = true, + .nullDescriptor = true, + }; + SetNext(next, robustness2); + VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT, .pNext = nullptr, |