diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 16 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 10 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_master_semaphore.cpp | 139 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_master_semaphore.h | 48 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 5 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.cpp | 36 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 6 |
9 files changed, 189 insertions, 76 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index a126c359c..02e161270 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -77,6 +77,14 @@ void Fermi2D::Blit() { const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && src.format != regs.dst.format; + + auto srcX = args.src_x0; + auto srcY = args.src_y0; + if (args.sample_mode.origin == Origin::Corner) { + srcX -= (args.du_dx >> 33) << 32; + srcY -= (args.dv_dy >> 33) << 32; + } + Config config{ .operation = regs.operation, .filter = args.sample_mode.filter, @@ -86,10 +94,10 @@ void Fermi2D::Blit() { .dst_y0 = args.dst_y0, .dst_x1 = args.dst_x0 + args.dst_width, .dst_y1 = args.dst_y0 + args.dst_height, - .src_x0 = static_cast<s32>(args.src_x0 >> 32), - .src_y0 = static_cast<s32>(args.src_y0 >> 32), - .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), - .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), + .src_x0 = static_cast<s32>(srcX >> 32), + .src_y0 = static_cast<s32>(srcY >> 32), + .src_x1 = static_cast<s32>((srcX + args.du_dx * args.dst_width) >> 32), + .src_y1 = static_cast<s32>((srcY + args.dv_dy * args.dst_height) >> 32), }; const auto need_align_to_pitch = diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 614d61db4..0932fadc2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,6 +4,7 @@ #include <cstring> #include <optional> #include "common/assert.h" +#include "common/bit_util.h" #include "common/scope_exit.h" #include "common/settings.h" #include "core/core.h" @@ -259,12 +260,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() { size_t Maxwell3D::EstimateIndexBufferSize() { GPUVAddr start_address = regs.index_buffer.StartAddress(); GPUVAddr end_address = regs.index_buffer.EndAddress(); - static constexpr std::array<size_t, 4> max_sizes = { - std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(), - std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; + static constexpr std::array<size_t, 3> max_sizes = {std::numeric_limits<u8>::max(), + std::numeric_limits<u16>::max(), + std::numeric_limits<u32>::max()}; const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); + const size_t log2_byte_size = Common::Log2Ceil64(byte_size); return std::min<size_t>( - memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / + memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) / byte_size, static_cast<size_t>(end_address - start_address)); } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 3ff8cad83..cc0b95f1a 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -176,6 +176,10 @@ public: return vendor_name == "ATI Technologies Inc."; } + bool IsIntel() const { + return vendor_name == "Intel"; + } + bool CanReportMemoryUsage() const { return can_report_memory; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 479bb8ba3..6ecda2984 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -218,6 +218,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .lower_left_origin_mode = true, .need_declared_frag_colors = true, .need_fastmath_off = device.NeedsFastmathOff(), + .need_gather_subpixel_offset = device.IsAmd() || device.IsIntel(), .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 8aa07ef9d..47c74e4d8 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -10,7 +10,14 @@ namespace Vulkan { -MasterSemaphore::MasterSemaphore(const Device& device) { +MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) { + if (!device.HasTimelineSemaphore()) { + static constexpr VkFenceCreateInfo fence_ci{ + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; + fence = device.GetLogical().CreateFence(fence_ci); + return; + } + static constexpr VkSemaphoreTypeCreateInfo semaphore_type_ci{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, .pNext = nullptr, @@ -42,4 +49,134 @@ MasterSemaphore::MasterSemaphore(const Device& device) { MasterSemaphore::~MasterSemaphore() = default; +void MasterSemaphore::Refresh() { + if (!semaphore) { + // If we don't support timeline semaphores, there's nothing to refresh + return; + } + + u64 this_tick{}; + u64 counter{}; + do { + this_tick = gpu_tick.load(std::memory_order_acquire); + counter = semaphore.GetCounter(); + if (counter < this_tick) { + return; + } + } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release, + std::memory_order_relaxed)); +} + +void MasterSemaphore::Wait(u64 tick) { + if (!semaphore) { + // If we don't support timeline semaphores, use an atomic wait + while (true) { + u64 current_value = gpu_tick.load(std::memory_order_relaxed); + if (current_value >= tick) { + return; + } + gpu_tick.wait(current_value); + } + + return; + } + + // No need to wait if the GPU is ahead of the tick + if (IsFree(tick)) { + return; + } + + // Update the GPU tick and try again + Refresh(); + + if (IsFree(tick)) { + return; + } + + // If none of the above is hit, fallback to a regular wait + while (!semaphore.Wait(tick)) { + } + + Refresh(); +} + +VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick) { + if (semaphore) { + return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); + } else { + return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); + } +} + +static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, +}; + +VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, + VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick) { + const VkSemaphore timeline_semaphore = *semaphore; + + const u32 num_signal_semaphores = signal_semaphore ? 2 : 1; + const std::array signal_values{host_tick, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + + const u32 num_wait_semaphores = wait_semaphore ? 2 : 1; + const std::array wait_values{host_tick - 1, u64(1)}; + const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; + + const VkTimelineSemaphoreSubmitInfo timeline_si{ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreValueCount = num_wait_semaphores, + .pWaitSemaphoreValues = wait_values.data(), + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_si, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + + return device.GetGraphicsQueue().Submit(submit_info); +} + +VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick) { + const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; + const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; + + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = &wait_semaphore, + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = &signal_semaphore, + }; + + auto result = device.GetGraphicsQueue().Submit(submit_info, *fence); + + if (result == VK_SUCCESS) { + fence.Wait(); + fence.Reset(); + gpu_tick.store(host_tick); + gpu_tick.notify_all(); + } + + return result; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 689f02ea5..f2f61f781 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -4,6 +4,8 @@ #pragma once #include <atomic> +#include <condition_variable> +#include <mutex> #include <thread> #include "common/common_types.h" @@ -29,11 +31,6 @@ public: return gpu_tick.load(std::memory_order_acquire); } - /// Returns the timeline semaphore handle. - [[nodiscard]] VkSemaphore Handle() const noexcept { - return *semaphore; - } - /// Returns true when a tick has been hit by the GPU. [[nodiscard]] bool IsFree(u64 tick) const noexcept { return KnownGpuTick() >= tick; @@ -45,37 +42,24 @@ public: } /// Refresh the known GPU tick - void Refresh() { - u64 this_tick{}; - u64 counter{}; - do { - this_tick = gpu_tick.load(std::memory_order_acquire); - counter = semaphore.GetCounter(); - if (counter < this_tick) { - return; - } - } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release, - std::memory_order_relaxed)); - } + void Refresh(); /// Waits for a tick to be hit on the GPU - void Wait(u64 tick) { - // No need to wait if the GPU is ahead of the tick - if (IsFree(tick)) { - return; - } - // Update the GPU tick and try again - Refresh(); - if (IsFree(tick)) { - return; - } - // If none of the above is hit, fallback to a regular wait - while (!semaphore.Wait(tick)) { - } - Refresh(); - } + void Wait(u64 tick); + + /// Submits the device graphics queue, updating the tick as necessary + VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick); + +private: + VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick); + VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, + VkSemaphore wait_semaphore, u64 host_tick); private: + const Device& device; ///< Device. + vk::Fence fence; ///< Fence. vk::Semaphore semaphore; ///< Timeline semaphore. std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. std::atomic<u64> current_tick{1}; ///< Current logical tick. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0684cceed..985cc3203 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -329,6 +329,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .lower_left_origin_mode = false, .need_declared_frag_colors = false, + .need_gather_subpixel_offset = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || + driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || + driver_id == VK_DRIVER_ID_MESA_RADV || + driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || + driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index b264e6ada..057e16967 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -212,45 +212,13 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s const u64 signal_value = master_semaphore->NextTick(); Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); - - const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U; - const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; - - const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; - const std::array wait_values{signal_value - 1, u64(1)}; - const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; - static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - }; - - const VkTimelineSemaphoreSubmitInfo timeline_si{ - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pNext = nullptr, - .waitSemaphoreValueCount = num_wait_semaphores, - .pWaitSemaphoreValues = wait_values.data(), - .signalSemaphoreValueCount = num_signal_semaphores, - .pSignalSemaphoreValues = signal_values.data(), - }; - const VkSubmitInfo submit_info{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &timeline_si, - .waitSemaphoreCount = num_wait_semaphores, - .pWaitSemaphores = wait_semaphores.data(), - .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1, - .pCommandBuffers = cmdbuf.address(), - .signalSemaphoreCount = num_signal_semaphores, - .pSignalSemaphores = signal_semaphores.data(), - }; if (on_submit) { on_submit(); } - switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { + switch (const VkResult result = master_semaphore->SubmitQueue( + cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { case VK_SUCCESS: break; case VK_ERROR_DEVICE_LOST: diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 41b5da18a..7d5018151 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -145,7 +145,6 @@ FEATURE_NAME(robustness2, robustImageAccess2) \ FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \ FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \ - FEATURE_NAME(timeline_semaphore, timelineSemaphore) \ FEATURE_NAME(variable_pointer, variablePointers) \ FEATURE_NAME(variable_pointer, variablePointersStorageBuffer) @@ -158,6 +157,7 @@ FEATURE_NAME(provoking_vertex, provokingVertexLast) \ FEATURE_NAME(shader_float16_int8, shaderFloat16) \ FEATURE_NAME(shader_float16_int8, shaderInt8) \ + FEATURE_NAME(timeline_semaphore, timelineSemaphore) \ FEATURE_NAME(transform_feedback, transformFeedback) \ FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \ FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState) @@ -493,6 +493,10 @@ public: return extensions.shader_atomic_int64; } + bool HasTimelineSemaphore() const { + return features.timeline_semaphore.timelineSemaphore; + } + /// Returns the minimum supported version of SPIR-V. u32 SupportedSpirvVersion() const { if (instance_version >= VK_API_VERSION_1_3) { |