summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/fermi_2d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_device.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp139
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h48
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp36
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h6
9 files changed, 189 insertions, 76 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index a126c359c..02e161270 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -77,6 +77,14 @@ void Fermi2D::Blit() {
const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 &&
src.format != regs.dst.format;
+
+ auto srcX = args.src_x0;
+ auto srcY = args.src_y0;
+ if (args.sample_mode.origin == Origin::Corner) {
+ srcX -= (args.du_dx >> 33) << 32;
+ srcY -= (args.dv_dy >> 33) << 32;
+ }
+
Config config{
.operation = regs.operation,
.filter = args.sample_mode.filter,
@@ -86,10 +94,10 @@ void Fermi2D::Blit() {
.dst_y0 = args.dst_y0,
.dst_x1 = args.dst_x0 + args.dst_width,
.dst_y1 = args.dst_y0 + args.dst_height,
- .src_x0 = static_cast<s32>(args.src_x0 >> 32),
- .src_y0 = static_cast<s32>(args.src_y0 >> 32),
- .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
- .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
+ .src_x0 = static_cast<s32>(srcX >> 32),
+ .src_y0 = static_cast<s32>(srcY >> 32),
+ .src_x1 = static_cast<s32>((srcX + args.du_dx * args.dst_width) >> 32),
+ .src_y1 = static_cast<s32>((srcY + args.dv_dy * args.dst_height) >> 32),
};
const auto need_align_to_pitch =
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 614d61db4..0932fadc2 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,6 +4,7 @@
#include <cstring>
#include <optional>
#include "common/assert.h"
+#include "common/bit_util.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/core.h"
@@ -259,12 +260,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
size_t Maxwell3D::EstimateIndexBufferSize() {
GPUVAddr start_address = regs.index_buffer.StartAddress();
GPUVAddr end_address = regs.index_buffer.EndAddress();
- static constexpr std::array<size_t, 4> max_sizes = {
- std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
- std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
+ static constexpr std::array<size_t, 3> max_sizes = {std::numeric_limits<u8>::max(),
+ std::numeric_limits<u16>::max(),
+ std::numeric_limits<u32>::max()};
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
+ const size_t log2_byte_size = Common::Log2Ceil64(byte_size);
return std::min<size_t>(
- memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
+ memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) /
byte_size,
static_cast<size_t>(end_address - start_address));
}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 3ff8cad83..cc0b95f1a 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -176,6 +176,10 @@ public:
return vendor_name == "ATI Technologies Inc.";
}
+ bool IsIntel() const {
+ return vendor_name == "Intel";
+ }
+
bool CanReportMemoryUsage() const {
return can_report_memory;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 479bb8ba3..6ecda2984 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -218,6 +218,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.lower_left_origin_mode = true,
.need_declared_frag_colors = true,
.need_fastmath_off = device.NeedsFastmathOff(),
+ .need_gather_subpixel_offset = device.IsAmd() || device.IsIntel(),
.has_broken_spirv_clamp = true,
.has_broken_unsigned_image_offsets = true,
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 8aa07ef9d..47c74e4d8 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -10,7 +10,14 @@
namespace Vulkan {
-MasterSemaphore::MasterSemaphore(const Device& device) {
+MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) {
+ if (!device.HasTimelineSemaphore()) {
+ static constexpr VkFenceCreateInfo fence_ci{
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
+ fence = device.GetLogical().CreateFence(fence_ci);
+ return;
+ }
+
static constexpr VkSemaphoreTypeCreateInfo semaphore_type_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.pNext = nullptr,
@@ -42,4 +49,134 @@ MasterSemaphore::MasterSemaphore(const Device& device) {
MasterSemaphore::~MasterSemaphore() = default;
+void MasterSemaphore::Refresh() {
+ if (!semaphore) {
+ // If we don't support timeline semaphores, there's nothing to refresh
+ return;
+ }
+
+ u64 this_tick{};
+ u64 counter{};
+ do {
+ this_tick = gpu_tick.load(std::memory_order_acquire);
+ counter = semaphore.GetCounter();
+ if (counter < this_tick) {
+ return;
+ }
+ } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
+ std::memory_order_relaxed));
+}
+
+void MasterSemaphore::Wait(u64 tick) {
+ if (!semaphore) {
+ // If we don't support timeline semaphores, use an atomic wait
+ while (true) {
+ u64 current_value = gpu_tick.load(std::memory_order_relaxed);
+ if (current_value >= tick) {
+ return;
+ }
+ gpu_tick.wait(current_value);
+ }
+
+ return;
+ }
+
+ // No need to wait if the GPU is ahead of the tick
+ if (IsFree(tick)) {
+ return;
+ }
+
+ // Update the GPU tick and try again
+ Refresh();
+
+ if (IsFree(tick)) {
+ return;
+ }
+
+ // If none of the above is hit, fallback to a regular wait
+ while (!semaphore.Wait(tick)) {
+ }
+
+ Refresh();
+}
+
+VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick) {
+ if (semaphore) {
+ return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
+ } else {
+ return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
+ }
+}
+
+static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+};
+
+VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
+ VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick) {
+ const VkSemaphore timeline_semaphore = *semaphore;
+
+ const u32 num_signal_semaphores = signal_semaphore ? 2 : 1;
+ const std::array signal_values{host_tick, u64(0)};
+ const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
+
+ const u32 num_wait_semaphores = wait_semaphore ? 2 : 1;
+ const std::array wait_values{host_tick - 1, u64(1)};
+ const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
+
+ const VkTimelineSemaphoreSubmitInfo timeline_si{
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+ .pNext = nullptr,
+ .waitSemaphoreValueCount = num_wait_semaphores,
+ .pWaitSemaphoreValues = wait_values.data(),
+ .signalSemaphoreValueCount = num_signal_semaphores,
+ .pSignalSemaphoreValues = signal_values.data(),
+ };
+ const VkSubmitInfo submit_info{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = &timeline_si,
+ .waitSemaphoreCount = num_wait_semaphores,
+ .pWaitSemaphores = wait_semaphores.data(),
+ .pWaitDstStageMask = wait_stage_masks.data(),
+ .commandBufferCount = 1,
+ .pCommandBuffers = cmdbuf.address(),
+ .signalSemaphoreCount = num_signal_semaphores,
+ .pSignalSemaphores = signal_semaphores.data(),
+ };
+
+ return device.GetGraphicsQueue().Submit(submit_info);
+}
+
+VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick) {
+ const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
+ const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
+
+ const VkSubmitInfo submit_info{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = nullptr,
+ .waitSemaphoreCount = num_wait_semaphores,
+ .pWaitSemaphores = &wait_semaphore,
+ .pWaitDstStageMask = wait_stage_masks.data(),
+ .commandBufferCount = 1,
+ .pCommandBuffers = cmdbuf.address(),
+ .signalSemaphoreCount = num_signal_semaphores,
+ .pSignalSemaphores = &signal_semaphore,
+ };
+
+ auto result = device.GetGraphicsQueue().Submit(submit_info, *fence);
+
+ if (result == VK_SUCCESS) {
+ fence.Wait();
+ fence.Reset();
+ gpu_tick.store(host_tick);
+ gpu_tick.notify_all();
+ }
+
+ return result;
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 689f02ea5..f2f61f781 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -4,6 +4,8 @@
#pragma once
#include <atomic>
+#include <condition_variable>
+#include <mutex>
#include <thread>
#include "common/common_types.h"
@@ -29,11 +31,6 @@ public:
return gpu_tick.load(std::memory_order_acquire);
}
- /// Returns the timeline semaphore handle.
- [[nodiscard]] VkSemaphore Handle() const noexcept {
- return *semaphore;
- }
-
/// Returns true when a tick has been hit by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return KnownGpuTick() >= tick;
@@ -45,37 +42,24 @@ public:
}
/// Refresh the known GPU tick
- void Refresh() {
- u64 this_tick{};
- u64 counter{};
- do {
- this_tick = gpu_tick.load(std::memory_order_acquire);
- counter = semaphore.GetCounter();
- if (counter < this_tick) {
- return;
- }
- } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
- std::memory_order_relaxed));
- }
+ void Refresh();
/// Waits for a tick to be hit on the GPU
- void Wait(u64 tick) {
- // No need to wait if the GPU is ahead of the tick
- if (IsFree(tick)) {
- return;
- }
- // Update the GPU tick and try again
- Refresh();
- if (IsFree(tick)) {
- return;
- }
- // If none of the above is hit, fallback to a regular wait
- while (!semaphore.Wait(tick)) {
- }
- Refresh();
- }
+ void Wait(u64 tick);
+
+ /// Submits the device graphics queue, updating the tick as necessary
+ VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick);
+
+private:
+ VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick);
+ VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+ VkSemaphore wait_semaphore, u64 host_tick);
private:
+ const Device& device; ///< Device.
+ vk::Fence fence; ///< Fence.
vk::Semaphore semaphore; ///< Timeline semaphore.
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
std::atomic<u64> current_tick{1}; ///< Current logical tick.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 0684cceed..985cc3203 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -329,6 +329,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.lower_left_origin_mode = false,
.need_declared_frag_colors = false,
+ .need_gather_subpixel_offset = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
+ driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
+ driver_id == VK_DRIVER_ID_MESA_RADV ||
+ driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS ||
+ driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA,
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
.has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index b264e6ada..057e16967 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -212,45 +212,13 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
const u64 signal_value = master_semaphore->NextTick();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
cmdbuf.End();
- const VkSemaphore timeline_semaphore = master_semaphore->Handle();
-
- const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
- const std::array signal_values{signal_value, u64(0)};
- const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
-
- const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
- const std::array wait_values{signal_value - 1, u64(1)};
- const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
- static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
- VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
- VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
- };
-
- const VkTimelineSemaphoreSubmitInfo timeline_si{
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
- .pNext = nullptr,
- .waitSemaphoreValueCount = num_wait_semaphores,
- .pWaitSemaphoreValues = wait_values.data(),
- .signalSemaphoreValueCount = num_signal_semaphores,
- .pSignalSemaphoreValues = signal_values.data(),
- };
- const VkSubmitInfo submit_info{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &timeline_si,
- .waitSemaphoreCount = num_wait_semaphores,
- .pWaitSemaphores = wait_semaphores.data(),
- .pWaitDstStageMask = wait_stage_masks.data(),
- .commandBufferCount = 1,
- .pCommandBuffers = cmdbuf.address(),
- .signalSemaphoreCount = num_signal_semaphores,
- .pSignalSemaphores = signal_semaphores.data(),
- };
if (on_submit) {
on_submit();
}
- switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
+ switch (const VkResult result = master_semaphore->SubmitQueue(
+ cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 41b5da18a..7d5018151 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -145,7 +145,6 @@
FEATURE_NAME(robustness2, robustImageAccess2) \
FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \
FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \
- FEATURE_NAME(timeline_semaphore, timelineSemaphore) \
FEATURE_NAME(variable_pointer, variablePointers) \
FEATURE_NAME(variable_pointer, variablePointersStorageBuffer)
@@ -158,6 +157,7 @@
FEATURE_NAME(provoking_vertex, provokingVertexLast) \
FEATURE_NAME(shader_float16_int8, shaderFloat16) \
FEATURE_NAME(shader_float16_int8, shaderInt8) \
+ FEATURE_NAME(timeline_semaphore, timelineSemaphore) \
FEATURE_NAME(transform_feedback, transformFeedback) \
FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \
FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState)
@@ -493,6 +493,10 @@ public:
return extensions.shader_atomic_int64;
}
+ bool HasTimelineSemaphore() const {
+ return features.timeline_semaphore.timelineSemaphore;
+ }
+
/// Returns the minimum supported version of SPIR-V.
u32 SupportedSpirvVersion() const {
if (instance_version >= VK_API_VERSION_1_3) {