summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt10
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h92
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h12
-rw-r--r--src/video_core/engines/maxwell_3d.cpp12
-rw-r--r--src/video_core/gpu.cpp1
-rw-r--r--src/video_core/gpu_thread.cpp2
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_base.h5
-rw-r--r--src/video_core/renderer_null/renderer_null.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp76
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_context.h1
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp150
-rw-r--r--src/video_core/renderer_opengl/gl_staging_buffer_pool.h (renamed from src/video_core/renderer_opengl/gl_stream_buffer.h)44
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp87
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h47
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp9
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h10
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp8
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp9
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp64
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.cpp52
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h2
-rw-r--r--src/video_core/texture_cache/image_info.cpp20
-rw-r--r--src/video_core/texture_cache/texture_cache.h12
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.cpp28
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp105
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h32
-rw-r--r--src/video_core/vulkan_common/vulkan_library.cpp18
-rw-r--r--src/video_core/vulkan_common/vulkan_library.h6
47 files changed, 861 insertions, 301 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 308d013d6..bf6439530 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -133,8 +133,8 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state_tracker.cpp
renderer_opengl/gl_state_tracker.h
- renderer_opengl/gl_stream_buffer.cpp
- renderer_opengl/gl_stream_buffer.h
+ renderer_opengl/gl_staging_buffer_pool.cpp
+ renderer_opengl/gl_staging_buffer_pool.h
renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h
renderer_opengl/gl_texture_cache_base.cpp
@@ -281,7 +281,7 @@ create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PUBLIC glad shader_recompiler stb)
-if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
+if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID))
add_dependencies(video_core ffmpeg-build)
endif()
@@ -345,3 +345,7 @@ endif()
if (YUZU_ENABLE_LTO)
set_property(TARGET video_core PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
+
+if (ANDROID AND ARCHITECTURE_arm64)
+ target_link_libraries(video_core PRIVATE adrenotools)
+endif()
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f1ad5f7cb..251a4a880 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -478,7 +478,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
if (committed_ranges.empty()) {
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
-
async_buffers.emplace_back(std::optional<Async_Buffer>{});
}
return;
@@ -539,7 +538,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
committed_ranges.clear();
if (downloads.empty()) {
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
-
async_buffers.emplace_back(std::optional<Async_Buffer>{});
}
return;
@@ -691,7 +689,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 size = channel_state->index_buffer.size;
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
- if constexpr (USE_MEMORY_MAPS) {
+ if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(size);
std::array<BufferCopy, 1> copies{
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
@@ -717,20 +715,38 @@ void BufferCache<P>::BindHostIndexBuffer() {
template <class P>
void BufferCache<P>::BindHostVertexBuffers() {
+ HostBindings host_bindings;
+ bool any_valid{false};
auto& flags = maxwell3d->dirty.flags;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
- const Binding& binding = channel_state->vertex_buffers[index];
- Buffer& buffer = slot_buffers[binding.buffer_id];
- TouchBuffer(buffer, binding.buffer_id);
- SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
}
- flags[Dirty::VertexBuffer0 + index] = false;
+ host_bindings.min_index = std::min(host_bindings.min_index, index);
+ host_bindings.max_index = std::max(host_bindings.max_index, index);
+ any_valid = true;
+ }
- const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
- const u32 offset = buffer.Offset(binding.cpu_addr);
- runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
+ if (any_valid) {
+ host_bindings.max_index++;
+ for (u32 index = host_bindings.min_index; index < host_bindings.max_index; index++) {
+ flags[Dirty::VertexBuffer0 + index] = false;
+
+ const Binding& binding = channel_state->vertex_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+
+ TouchBuffer(buffer, binding.buffer_id);
+ SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
+
+ const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+
+ host_bindings.buffers.push_back(reinterpret_cast<void*>(&buffer));
+ host_bindings.offsets.push_back(offset);
+ host_bindings.sizes.push_back(binding.size);
+ host_bindings.strides.push_back(stride);
+ }
+ runtime.BindVertexBuffers(host_bindings);
}
}
@@ -884,15 +900,25 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
if (maxwell3d->regs.transform_feedback_enabled == 0) {
return;
}
+ HostBindings host_bindings;
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
const Binding& binding = channel_state->transform_feedback_buffers[index];
+ if (maxwell3d->regs.transform_feedback.controls[index].varying_count == 0 &&
+ maxwell3d->regs.transform_feedback.controls[index].stride == 0) {
+ break;
+ }
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
- runtime.BindTransformFeedbackBuffer(index, buffer, offset, size);
+ host_bindings.buffers.push_back(reinterpret_cast<void*>(&buffer));
+ host_bindings.offsets.push_back(offset);
+ host_bindings.sizes.push_back(binding.size);
+ }
+ if (host_bindings.buffers.size() > 0) {
+ runtime.BindTransformFeedbackBuffers(host_bindings);
}
}
@@ -1462,7 +1488,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,
template <class P>
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies) {
- if constexpr (USE_MEMORY_MAPS) {
+ if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
MappedUploadMemory(buffer, total_size_bytes, copies);
} else {
ImmediateUploadMemory(buffer, largest_copy, copies);
@@ -1473,7 +1499,7 @@ template <class P>
void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] u64 largest_copy,
[[maybe_unused]] std::span<const BufferCopy> copies) {
- if constexpr (!USE_MEMORY_MAPS) {
+ if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {
std::span<u8> immediate_buffer;
for (const BufferCopy& copy : copies) {
std::span<const u8> upload_span;
@@ -1532,7 +1558,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
auto& buffer = slot_buffers[buffer_id];
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
- if constexpr (USE_MEMORY_MAPS) {
+ if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(copy_size);
std::array copies{BufferCopy{
.src_offset = upload_staging.offset,
@@ -1618,6 +1644,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
template <class P>
void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
+ bool dirty_index{false};
+ boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> dirty_vertex_buffers;
const auto scalar_replace = [buffer_id](Binding& binding) {
if (binding.buffer_id == buffer_id) {
binding.buffer_id = BufferId{};
@@ -1626,8 +1654,19 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
const auto replace = [scalar_replace](std::span<Binding> bindings) {
std::ranges::for_each(bindings, scalar_replace);
};
- scalar_replace(channel_state->index_buffer);
- replace(channel_state->vertex_buffers);
+
+ if (channel_state->index_buffer.buffer_id == buffer_id) {
+ channel_state->index_buffer.buffer_id = BufferId{};
+ dirty_index = true;
+ }
+
+ for (u32 index = 0; index < channel_state->vertex_buffers.size(); index++) {
+ auto& binding = channel_state->vertex_buffers[index];
+ if (binding.buffer_id == buffer_id) {
+ binding.buffer_id = BufferId{};
+ dirty_vertex_buffers.push_back(index);
+ }
+ }
std::ranges::for_each(channel_state->uniform_buffers, replace);
std::ranges::for_each(channel_state->storage_buffers, replace);
replace(channel_state->transform_feedback_buffers);
@@ -1644,20 +1683,21 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
slot_buffers.erase(buffer_id);
- NotifyBufferDeletion();
-}
-
-template <class P>
-void BufferCache<P>::NotifyBufferDeletion() {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->dirty_uniform_buffers.fill(~u32{0});
channel_state->uniform_buffer_binding_sizes.fill({});
}
+
auto& flags = maxwell3d->dirty.flags;
- flags[Dirty::IndexBuffer] = true;
- flags[Dirty::VertexBuffers] = true;
- for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
- flags[Dirty::VertexBuffer0 + index] = true;
+ if (dirty_index) {
+ flags[Dirty::IndexBuffer] = true;
+ }
+
+ if (dirty_vertex_buffers.size() > 0) {
+ flags[Dirty::VertexBuffers] = true;
+ for (auto index : dirty_vertex_buffers) {
+ flags[Dirty::VertexBuffer0 + index] = true;
+ }
}
channel_state->has_deleted_buffers = true;
}
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index c689fe06b..cf359e241 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -105,6 +105,15 @@ static constexpr Binding NULL_BINDING{
.buffer_id = NULL_BUFFER_ID,
};
+struct HostBindings {
+ boost::container::small_vector<void*, NUM_VERTEX_BUFFERS> buffers;
+ boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> offsets;
+ boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> sizes;
+ boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> strides;
+ u32 min_index{NUM_VERTEX_BUFFERS};
+ u32 max_index{0};
+};
+
class BufferCacheChannelInfo : public ChannelInfo {
public:
BufferCacheChannelInfo() = delete;
@@ -173,6 +182,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
+ static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
@@ -518,8 +528,6 @@ private:
void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
- void NotifyBufferDeletion();
-
[[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
bool is_written) const;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2f986097f..62d70e9f3 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -593,6 +593,12 @@ void Maxwell3D::ProcessQueryCondition() {
}
void Maxwell3D::ProcessCounterReset() {
+#if ANDROID
+ if (!Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ return;
+ }
+#endif
switch (regs.clear_report_value) {
case Regs::ClearReport::ZPassPixelCount:
rasterizer->ResetCounter(QueryType::SamplesPassed);
@@ -614,6 +620,12 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
case Regs::ReportSemaphore::Report::Payload:
return regs.report_semaphore.payload;
case Regs::ReportSemaphore::Report::ZPassPixelCount64:
+#if ANDROID
+ if (!Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ return 120;
+ }
+#endif
// Deferred.
rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed,
system.GPU().GetTicks());
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 295a416a8..456f733cf 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -14,6 +14,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
+#include "core/frontend/graphics_context.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/perf_stats.h"
#include "video_core/cdma_pusher.h"
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 3c5317777..889144f38 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -7,7 +7,7 @@
#include "common/settings.h"
#include "common/thread.h"
#include "core/core.h"
-#include "core/frontend/emu_window.h"
+#include "core/frontend/graphics_context.h"
#include "video_core/control/scheduler.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index e8761a747..2d3f58201 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -5,6 +5,7 @@
#include "common/logging/log.h"
#include "core/frontend/emu_window.h"
+#include "core/frontend/graphics_context.h"
#include "video_core/renderer_base.h"
namespace VideoCore {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 8d20cbece..3e12a8813 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -9,7 +9,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "core/frontend/emu_window.h"
+#include "core/frontend/framebuffer_layout.h"
#include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h"
@@ -89,6 +89,9 @@ public:
void RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout);
+ /// This is called to notify the rendering backend of a surface change
+ virtual void NotifySurfaceChanged() {}
+
protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<Core::Frontend::GraphicsContext> context;
diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp
index e2a189b63..be92cc2f4 100644
--- a/src/video_core/renderer_null/renderer_null.cpp
+++ b/src/video_core/renderer_null/renderer_null.cpp
@@ -1,6 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#include "core/frontend/emu_window.h"
+#include "core/frontend/graphics_context.h"
#include "video_core/renderer_null/renderer_null.h"
namespace Null {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 6d3bda192..0cc546a3a 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
return views.back().texture.handle;
}
-BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
- : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
+ StagingBufferPool& staging_buffer_pool_)
+ : device{device_}, staging_buffer_pool{staging_buffer_pool_},
+ has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
@@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
}();
}
+StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestUploadBuffer(size);
+}
+
+StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestDownloadBuffer(size);
+}
+
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
if (device.CanReportMemoryUsage()) {
return device_access_memory - device.GetCurrentDedicatedVideoMemory();
@@ -147,13 +157,47 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
return 2_GiB;
}
-void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
- std::span<const VideoCommon::BufferCopy> copies) {
+void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ if (barrier) {
+ PreCopyBarrier();
+ }
for (const VideoCommon::BufferCopy& copy : copies) {
- glCopyNamedBufferSubData(
- src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
- static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
+ glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset),
+ static_cast<GLintptr>(copy.dst_offset),
+ static_cast<GLsizeiptr>(copy.size));
}
+ if (barrier) {
+ PostCopyBarrier();
+ }
+}
+
+void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier);
+}
+
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+ CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
+}
+
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
+}
+
+void BufferCacheRuntime::PreCopyBarrier() {
+ // TODO: finer grained barrier?
+ glMemoryBarrier(GL_ALL_BARRIER_BITS);
+}
+
+void BufferCacheRuntime::PostCopyBarrier() {
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+}
+
+void BufferCacheRuntime::Finish() {
+ glFinish();
}
void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
@@ -188,6 +232,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset,
}
}
+void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) {
+ for (u32 index = 0; index < bindings.buffers.size(); index++) {
+ BindVertexBuffer(
+ bindings.min_index + index, *reinterpret_cast<Buffer*>(bindings.buffers[index]),
+ static_cast<u32>(bindings.offsets[index]), static_cast<u32>(bindings.sizes[index]),
+ static_cast<u32>(bindings.strides[index]));
+ }
+}
+
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size) {
if (use_assembly_shaders) {
@@ -276,6 +329,15 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
+void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings) {
+ for (u32 index = 0; index < bindings.buffers.size(); index++) {
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index,
+ reinterpret_cast<Buffer*>(bindings.buffers[index])->Handle(),
+ static_cast<GLintptr>(bindings.offsets[index]),
+ static_cast<GLsizeiptr>(bindings.sizes[index]));
+ }
+}
+
void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
PixelFormat format) {
*texture_handles++ = buffer.View(offset, size, format);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 18d3c3ac0..e4e000284 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -7,12 +7,12 @@
#include <span>
#include "common/common_types.h"
-#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
namespace OpenGL {
@@ -60,16 +60,34 @@ class BufferCacheRuntime {
public:
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
- explicit BufferCacheRuntime(const Device& device_);
+ explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_);
+
+ [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
+
+ [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
+
+ void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+ void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+ void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
+ void PreCopyBarrier();
+ void PostCopyBarrier();
+ void Finish();
+
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
+ void BindVertexBuffers(VideoCommon::HostBindings& bindings);
void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
@@ -82,6 +100,7 @@ public:
bool is_written);
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+ void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings);
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
@@ -169,6 +188,7 @@ private:
};
const Device& device;
+ StagingBufferPool& staging_buffer_pool;
bool has_fast_buffer_sub_data = false;
bool use_assembly_shaders = false;
@@ -201,7 +221,7 @@ private:
struct BufferCacheParams {
using Runtime = OpenGL::BufferCacheRuntime;
using Buffer = OpenGL::Buffer;
- using Async_Buffer = u32;
+ using Async_Buffer = OpenGL::StagingBufferMap;
using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
static constexpr bool IS_OPENGL = true;
@@ -209,9 +229,12 @@ struct BufferCacheParams {
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
- static constexpr bool USE_MEMORY_MAPS = false;
+ static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
+
+ // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
+ static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 400c21981..03d234f2f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -201,6 +201,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
!(is_amd || (is_intel && !is_linux)) && !strict_context_required;
use_driver_cache = is_nvidia;
+ supports_conditional_barriers = !is_intel;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index cc0b95f1a..ad27264e5 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -188,6 +188,10 @@ public:
return strict_context_required;
}
+ bool SupportsConditionalBarriers() const {
+ return supports_conditional_barriers;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -233,6 +237,7 @@ private:
bool has_bool_ref_bug{};
bool can_report_memory{};
bool strict_context_required{};
+ bool supports_conditional_barriers{};
std::string vendor_name;
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f5baa0f3c..fc711c44a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -24,6 +24,7 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
StateTracker& state_tracker_)
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),
program_manager(program_manager_), state_tracker(state_tracker_),
- texture_cache_runtime(device, program_manager, state_tracker),
- texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
+ texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
+ texture_cache(texture_cache_runtime, *this),
+ buffer_cache_runtime(device, staging_buffer_pool),
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 410d8ffc5..a73ad15c1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -230,6 +230,7 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
+ StagingBufferPool staging_buffer_pool;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 6ecda2984..3f077311e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -232,12 +232,14 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
},
host_info{
+ .support_float64 = true,
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
+ .support_conditional_barrier = device.SupportsConditionalBarriers(),
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h
index ca2bd8e8e..207a75d42 100644
--- a/src/video_core/renderer_opengl/gl_shader_context.h
+++ b/src/video_core/renderer_opengl/gl_shader_context.h
@@ -4,6 +4,7 @@
#pragma once
#include "core/frontend/emu_window.h"
+#include "core/frontend/graphics_context.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
new file mode 100644
index 000000000..bbb06e51f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
@@ -0,0 +1,150 @@
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <array>
+#include <memory>
+#include <span>
+
+#include <glad/glad.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/microprofile.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
+
+MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192));
+
+namespace OpenGL {
+
+StagingBufferMap::~StagingBufferMap() {
+ if (sync) {
+ sync->Create();
+ }
+}
+
+StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
+ : storage_flags{storage_flags_}, map_flags{map_flags_} {}
+
+StagingBuffers::~StagingBuffers() = default;
+
+StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
+ MICROPROFILE_SCOPE(OpenGL_BufferRequest);
+
+ const size_t index = RequestBuffer(requested_size);
+ OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
+ sync_indices[index] = insert_fence ? ++current_sync_index : 0;
+ return StagingBufferMap{
+ .mapped_span = std::span(maps[index], requested_size),
+ .sync = sync,
+ .buffer = buffers[index].handle,
+ };
+}
+
+size_t StagingBuffers::RequestBuffer(size_t requested_size) {
+ if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
+ return *index;
+ }
+
+ OGLBuffer& buffer = buffers.emplace_back();
+ buffer.Create();
+ const auto next_pow2_size = Common::NextPow2(requested_size);
+ glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr,
+ storage_flags | GL_MAP_PERSISTENT_BIT);
+ maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size,
+ map_flags | GL_MAP_PERSISTENT_BIT)));
+ syncs.emplace_back();
+ sync_indices.emplace_back();
+ sizes.push_back(next_pow2_size);
+
+ ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
+ maps.size() == sizes.size());
+
+ return buffers.size() - 1;
+}
+
+std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
+ size_t known_unsignaled_index = current_sync_index + 1;
+ size_t smallest_buffer = std::numeric_limits<size_t>::max();
+ std::optional<size_t> found;
+ const size_t num_buffers = sizes.size();
+ for (size_t index = 0; index < num_buffers; ++index) {
+ const size_t buffer_size = sizes[index];
+ if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
+ continue;
+ }
+ if (syncs[index].handle != 0) {
+ if (sync_indices[index] >= known_unsignaled_index) {
+ // This fence is later than a fence that is known to not be signaled
+ continue;
+ }
+ if (!syncs[index].IsSignaled()) {
+ // Since this fence hasn't been signaled, it's safe to assume all later
+ // fences haven't been signaled either
+ known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]);
+ continue;
+ }
+ syncs[index].Release();
+ }
+ smallest_buffer = buffer_size;
+ found = index;
+ }
+ return found;
+}
+
+StreamBuffer::StreamBuffer() {
+ static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
+ buffer.Create();
+ glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
+ glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
+ mapped_pointer =
+ static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
+ for (OGLSync& sync : fences) {
+ sync.Create();
+ }
+}
+
+std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
+ ASSERT(size < REGION_SIZE);
+ for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
+ ++region) {
+ fences[region].Create();
+ }
+ used_iterator = iterator;
+
+ for (size_t region = Region(free_iterator) + 1,
+ region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
+ region < region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
+ }
+ if (iterator + size >= free_iterator) {
+ free_iterator = iterator + size;
+ }
+ if (iterator + size > STREAM_BUFFER_SIZE) {
+ for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
+ fences[region].Create();
+ }
+ used_iterator = 0;
+ iterator = 0;
+ free_iterator = size;
+
+ for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
+ }
+ }
+ const size_t offset = iterator;
+ iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ return {std::span(mapped_pointer + offset, size), offset};
+}
+
+StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
+ return upload_buffers.RequestMap(size, true);
+}
+
+StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
+ return download_buffers.RequestMap(size, false);
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
index 8fe927aaf..60f72d3a0 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
@@ -4,8 +4,10 @@
#pragma once
#include <array>
+#include <optional>
#include <span>
#include <utility>
+#include <vector>
#include <glad/glad.h>
@@ -17,6 +19,35 @@ namespace OpenGL {
using namespace Common::Literals;
+struct StagingBufferMap {
+ ~StagingBufferMap();
+
+ std::span<u8> mapped_span;
+ size_t offset = 0;
+ OGLSync* sync;
+ GLuint buffer;
+};
+
+struct StagingBuffers {
+ explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
+ ~StagingBuffers();
+
+ StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
+
+ size_t RequestBuffer(size_t requested_size);
+
+ std::optional<size_t> FindBuffer(size_t requested_size);
+
+ std::vector<OGLSync> syncs;
+ std::vector<OGLBuffer> buffers;
+ std::vector<u8*> maps;
+ std::vector<size_t> sizes;
+ std::vector<size_t> sync_indices;
+ GLenum storage_flags;
+ GLenum map_flags;
+ size_t current_sync_index = 0;
+};
+
class StreamBuffer {
static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
static constexpr size_t NUM_SYNCS = 16;
@@ -48,4 +79,17 @@ private:
std::array<OGLSync, NUM_SYNCS> fences;
};
+class StagingBufferPool {
+public:
+ StagingBufferPool() = default;
+ ~StagingBufferPool() = default;
+
+ StagingBufferMap RequestUploadBuffer(size_t size);
+ StagingBufferMap RequestDownloadBuffer(size_t size);
+
+private:
+ StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
+ StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
+};
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
deleted file mode 100644
index 2005c8993..000000000
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <array>
-#include <memory>
-#include <span>
-
-#include <glad/glad.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
-
-namespace OpenGL {
-
-StreamBuffer::StreamBuffer() {
- static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
- buffer.Create();
- glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
- glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
- mapped_pointer =
- static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
- for (OGLSync& sync : fences) {
- sync.Create();
- }
-}
-
-std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
- ASSERT(size < REGION_SIZE);
- for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
- ++region) {
- fences[region].Create();
- }
- used_iterator = iterator;
-
- for (size_t region = Region(free_iterator) + 1,
- region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
- region < region_end; ++region) {
- glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
- fences[region].Release();
- }
- if (iterator + size >= free_iterator) {
- free_iterator = iterator + size;
- }
- if (iterator + size > STREAM_BUFFER_SIZE) {
- for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
- fences[region].Create();
- }
- used_iterator = 0;
- iterator = 0;
- free_iterator = size;
-
- for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
- glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
- fences[region].Release();
- }
- }
- const size_t offset = iterator;
- iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
- return {std::span(mapped_pointer + offset, size), offset};
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 56d0ff869..1c5dbcdd8 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -456,19 +456,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
}
}
-
} // Anonymous namespace
-ImageBufferMap::~ImageBufferMap() {
- if (sync) {
- sync->Create();
- }
-}
-
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
- StateTracker& state_tracker_)
- : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
- format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
+ StateTracker& state_tracker_,
+ StagingBufferPool& staging_buffer_pool_)
+ : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_},
+ util_shaders(program_manager), format_conversion_pass{util_shaders},
+ resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
@@ -558,12 +553,12 @@ void TextureCacheRuntime::Finish() {
glFinish();
}
-ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
- return upload_buffers.RequestMap(size, true);
+StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestUploadBuffer(size);
}
-ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
- return download_buffers.RequestMap(size, false);
+StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_buffer_pool.RequestDownloadBuffer(size);
}
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
@@ -648,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
is_linear ? GL_LINEAR : GL_NEAREST);
}
-void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
switch (image.info.type) {
case ImageType::e2D:
@@ -690,64 +685,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
return device.HasASTC();
}
-TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
- : storage_flags{storage_flags_}, map_flags{map_flags_} {}
-
-TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
-
-ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
- bool insert_fence) {
- const size_t index = RequestBuffer(requested_size);
- OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
- return ImageBufferMap{
- .mapped_span = std::span(maps[index], requested_size),
- .sync = sync,
- .buffer = buffers[index].handle,
- };
-}
-
-size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
- if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
- return *index;
- }
-
- OGLBuffer& buffer = buffers.emplace_back();
- buffer.Create();
- glNamedBufferStorage(buffer.handle, requested_size, nullptr,
- storage_flags | GL_MAP_PERSISTENT_BIT);
- maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
- map_flags | GL_MAP_PERSISTENT_BIT)));
-
- syncs.emplace_back();
- sizes.push_back(requested_size);
-
- ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
- maps.size() == sizes.size());
-
- return buffers.size() - 1;
-}
-
-std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
- size_t smallest_buffer = std::numeric_limits<size_t>::max();
- std::optional<size_t> found;
- const size_t num_buffers = sizes.size();
- for (size_t index = 0; index < num_buffers; ++index) {
- const size_t buffer_size = sizes[index];
- if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
- continue;
- }
- if (syncs[index].handle != 0) {
- if (!syncs[index].IsSignaled()) {
- continue;
- }
- syncs[index].Release();
- }
- smallest_buffer = buffer_size;
- found = index;
- }
- return found;
-}
-
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
@@ -823,7 +760,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset,
}
}
-void Image::UploadMemory(const ImageBufferMap& map,
+void Image::UploadMemory(const StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
UploadMemory(map.buffer, map.offset, copies);
}
@@ -870,7 +807,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b
}
}
-void Image::DownloadMemory(ImageBufferMap& map,
+void Image::DownloadMemory(StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
DownloadMemory(map.buffer, map.offset, copies);
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3e9b3302b..1148b73d7 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -11,6 +11,7 @@
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
@@ -37,15 +38,6 @@ using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
using VideoCommon::SlotVector;
-struct ImageBufferMap {
- ~ImageBufferMap();
-
- std::span<u8> mapped_span;
- size_t offset = 0;
- OGLSync* sync;
- GLuint buffer;
-};
-
struct FormatProperties {
GLenum compatibility_class;
bool compatibility_by_size;
@@ -74,14 +66,15 @@ class TextureCacheRuntime {
public:
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
- StateTracker& state_tracker);
+ StateTracker& state_tracker,
+ StagingBufferPool& staging_buffer_pool);
~TextureCacheRuntime();
void Finish();
- ImageBufferMap UploadStagingBuffer(size_t size);
+ StagingBufferMap UploadStagingBuffer(size_t size);
- ImageBufferMap DownloadStagingBuffer(size_t size);
+ StagingBufferMap DownloadStagingBuffer(size_t size);
u64 GetDeviceLocalMemory() const {
return device_access_memory;
@@ -120,7 +113,7 @@ public:
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
- void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+ void AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
@@ -149,35 +142,16 @@ public:
}
private:
- struct StagingBuffers {
- explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
- ~StagingBuffers();
-
- ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
-
- size_t RequestBuffer(size_t requested_size);
-
- std::optional<size_t> FindBuffer(size_t requested_size);
-
- std::vector<OGLSync> syncs;
- std::vector<OGLBuffer> buffers;
- std::vector<u8*> maps;
- std::vector<size_t> sizes;
- GLenum storage_flags;
- GLenum map_flags;
- };
-
const Device& device;
StateTracker& state_tracker;
+ StagingBufferPool& staging_buffer_pool;
+
UtilShaders util_shaders;
FormatConversionPass format_conversion_pass;
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
bool has_broken_texture_view_formats = false;
- StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
- StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
-
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
@@ -213,7 +187,7 @@ public:
void UploadMemory(GLuint buffer_handle, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map,
+ void UploadMemory(const StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
@@ -222,7 +196,8 @@ public:
void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
- void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
+ void DownloadMemory(StagingBufferMap& map,
+ std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 2c7ac210b..544982d18 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -19,6 +19,7 @@
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
@@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
UtilShaders::~UtilShaders() = default;
-void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
+void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
@@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
-void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
-void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
@@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute();
}
-void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
+void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 9013808e7..feecd404c 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -16,23 +16,23 @@ namespace OpenGL {
class Image;
class ProgramManager;
-struct ImageBufferMap;
+struct StagingBufferMap;
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
- void ASTCDecode(Image& image, const ImageBufferMap& map,
+ void ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+ void BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+ void BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void PitchUpload(Image& image, const ImageBufferMap& map,
+ void PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index b75d7220d..9a0b10568 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -347,6 +347,14 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size) {
+ if (device.MustEmulateScaledFormats()) {
+ if (type == Maxwell::VertexAttribute::Type::SScaled) {
+ type = Maxwell::VertexAttribute::Type::SInt;
+ } else if (type == Maxwell::VertexAttribute::Type::UScaled) {
+ type = Maxwell::VertexAttribute::Type::UInt;
+ }
+ }
+
const VkFormat format{([&]() {
switch (type) {
case Maxwell::VertexAttribute::Type::UnusedEnumDoNotUseBecauseItWillGoAway:
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 8e31eba34..77128c6e2 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -16,7 +16,7 @@
#include "common/settings.h"
#include "common/telemetry.h"
#include "core/core_timing.h"
-#include "core/frontend/emu_window.h"
+#include "core/frontend/graphics_context.h"
#include "core/telemetry_session.h"
#include "video_core/gpu.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
@@ -84,8 +84,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
- cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
- instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
+ cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())),
+ instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window.GetWindowInfo())),
@@ -93,7 +93,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
state_tracker(), scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
- present_manager(render_window, device, memory_allocator, scheduler, swapchain),
+ present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
+ surface),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager,
scheduler, screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index f44367cb2..b2e8cbd1b 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -54,6 +54,10 @@ public:
return device.GetDriverName();
}
+ void NotifySurfaceChanged() override {
+ present_manager.NotifySurfaceChanged();
+ }
+
private:
void Report() const;
@@ -63,7 +67,7 @@ private:
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
- Common::DynamicLibrary library;
+ std::shared_ptr<Common::DynamicLibrary> library;
vk::InstanceDispatch dld;
vk::Instance instance;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 1e0fdd3d9..acb143fc7 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -74,7 +74,7 @@ struct ScreenRectVertex {
}
};
-constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
+std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
// clang-format off
return { 2.f / width, 0.f, 0.f, 0.f,
0.f, 2.f / height, 0.f, 0.f,
@@ -441,7 +441,12 @@ void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& f
if (const std::size_t swapchain_images = swapchain.GetImageCount();
swapchain_images != image_count || current_srgb != is_srgb) {
current_srgb = is_srgb;
+#ifdef ANDROID
+ // Android is already ordered the same as Switch.
+ image_view_format = current_srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
+#else
image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
+#endif
image_count = swapchain_images;
Recreate();
}
@@ -1107,7 +1112,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
.pNext = nullptr,
.flags = 0,
.imageType = VK_IMAGE_TYPE_2D,
- .format = GetFormat(framebuffer),
+ .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : GetFormat(framebuffer),
.extent =
{
.width = (up_scale * framebuffer.width) >> down_shift,
@@ -1128,14 +1133,14 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
const auto create_commit = [&](vk::Image& image) {
return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
};
- const auto create_image_view = [&](vk::Image& image) {
+ const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) {
return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = GetFormat(framebuffer),
+ .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : GetFormat(framebuffer),
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -1165,7 +1170,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
const u32 down_shift = Settings::values.resolution_info.down_shift;
aa_image = create_image(true, up_scale, down_shift);
aa_commit = create_commit(aa_image);
- aa_image_view = create_image_view(aa_image);
+ aa_image_view = create_image_view(aa_image, true);
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 9627eb129..d72d99899 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -7,7 +7,6 @@
#include <span>
#include <vector>
-#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -303,9 +302,13 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
DescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
- uint8_pass(device, scheduler, descriptor_pool, staging_pool, compute_pass_descriptor_queue),
quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
compute_pass_descriptor_queue) {
+ if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
+ // TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
+ uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
+ compute_pass_descriptor_queue);
+ }
quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_,
scheduler_, staging_pool_);
quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_,
@@ -442,7 +445,9 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
topology == PrimitiveTopology::QuadStrip);
} else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
vk_index_type = VK_INDEX_TYPE_UINT16;
- std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
+ if (uint8_pass) {
+ std::tie(vk_buffer, vk_offset) = uint8_pass->Assemble(num_indices, buffer, offset);
+ }
}
if (vk_buffer == VK_NULL_HANDLE) {
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
@@ -496,6 +501,40 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
}
}
+void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) {
+ boost::container::small_vector<VkBuffer, 32> buffer_handles;
+ for (u32 index = 0; index < bindings.buffers.size(); index++) {
+ auto& buffer = *reinterpret_cast<Buffer*>(bindings.buffers[index]);
+ auto handle = buffer.Handle();
+ if (handle == VK_NULL_HANDLE) {
+ bindings.offsets[index] = 0;
+ bindings.sizes[index] = VK_WHOLE_SIZE;
+ if (!device.HasNullDescriptor()) {
+ ReserveNullBuffer();
+ handle = *null_buffer;
+ }
+ }
+ buffer_handles.push_back(handle);
+ }
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ scheduler.Record([bindings = bindings,
+ buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffers2EXT(
+ bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(),
+ reinterpret_cast<const VkDeviceSize*>(bindings.offsets.data()),
+ reinterpret_cast<const VkDeviceSize*>(bindings.sizes.data()),
+ reinterpret_cast<const VkDeviceSize*>(bindings.strides.data()));
+ });
+ } else {
+ scheduler.Record([bindings = bindings,
+ buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffers(
+ bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(),
+ reinterpret_cast<const VkDeviceSize*>(bindings.offsets.data()));
+ });
+ }
+}
+
void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
u32 size) {
if (!device.IsExtTransformFeedbackSupported()) {
@@ -517,6 +556,25 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
});
}
+void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings) {
+ if (!device.IsExtTransformFeedbackSupported()) {
+ // Already logged in the rasterizer
+ return;
+ }
+ boost::container::small_vector<VkBuffer, 4> buffer_handles;
+ for (u32 index = 0; index < bindings.buffers.size(); index++) {
+ auto& buffer = *reinterpret_cast<Buffer*>(bindings.buffers[index]);
+ buffer_handles.push_back(buffer.Handle());
+ }
+ scheduler.Record(
+ [bindings = bindings, buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindTransformFeedbackBuffersEXT(
+ 0, static_cast<u32>(buffer_handles.size()), buffer_handles.data(),
+ reinterpret_cast<const VkDeviceSize*>(bindings.offsets.data()),
+ reinterpret_cast<const VkDeviceSize*>(bindings.sizes.data()));
+ });
+}
+
void BufferCacheRuntime::ReserveNullBuffer() {
if (null_buffer) {
return;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 5e9602905..92d3e9f32 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -18,6 +18,7 @@ namespace Vulkan {
class Device;
class DescriptorPool;
class Scheduler;
+struct HostVertexBinding;
class BufferCacheRuntime;
@@ -96,8 +97,10 @@ public:
void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count);
void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
+ void BindVertexBuffers(VideoCommon::HostBindings& bindings);
void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
+ void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings);
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
[[maybe_unused]] u32 binding_index, u32 size) {
@@ -139,7 +142,7 @@ private:
vk::Buffer null_buffer;
MemoryCommit null_buffer_commit;
- Uint8Pass uint8_pass;
+ std::unique_ptr<Uint8Pass> uint8_pass;
QuadIndexedPass quad_index_pass;
};
@@ -157,6 +160,7 @@ struct BufferCacheParams {
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
+ static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 66dfe5733..5734f51e5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -114,14 +114,16 @@ Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribut
return Shader::AttributeType::Disabled;
case Maxwell::VertexAttribute::Type::SNorm:
case Maxwell::VertexAttribute::Type::UNorm:
- case Maxwell::VertexAttribute::Type::UScaled:
- case Maxwell::VertexAttribute::Type::SScaled:
case Maxwell::VertexAttribute::Type::Float:
return Shader::AttributeType::Float;
case Maxwell::VertexAttribute::Type::SInt:
return Shader::AttributeType::SignedInt;
case Maxwell::VertexAttribute::Type::UInt:
return Shader::AttributeType::UnsignedInt;
+ case Maxwell::VertexAttribute::Type::UScaled:
+ return Shader::AttributeType::UnsignedScaled;
+ case Maxwell::VertexAttribute::Type::SScaled:
+ return Shader::AttributeType::SignedScaled;
}
return Shader::AttributeType::Float;
}
@@ -286,14 +288,17 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
texture_cache{texture_cache_}, shader_notify{shader_notify_},
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()},
- workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
+ workers(device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY
+ ? 1
+ : (std::max(std::thread::hardware_concurrency(), 2U) - 1),
+ "VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
const auto& float_control{device.FloatControlProperties()};
const VkDriverId driver_id{device.GetDriverID()};
profile = Shader::Profile{
.supported_spirv = device.SupportedSpirvVersion(),
.unified_descriptor_binding = true,
- .support_descriptor_aliasing = true,
+ .support_descriptor_aliasing = device.IsDescriptorAliasingSupported(),
.support_int8 = device.IsInt8Supported(),
.support_int16 = device.IsShaderInt16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
@@ -324,6 +329,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.support_derivative_control = true,
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
.support_native_ndc = device.IsExtDepthClipControlSupported(),
+ .support_scaled_attributes = !device.MustEmulateScaledFormats(),
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
@@ -341,8 +347,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.has_broken_signed_operations = false,
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
.ignore_nan_fp_comparisons = false,
- };
+ .has_broken_spirv_subgroup_mask_vector_extract_dynamic =
+ driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY};
host_info = Shader::HostTranslateInfo{
+ .support_float64 = device.IsFloat64Supported(),
.support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
.needs_demote_reorder =
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp
index c49583013..10ace0420 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp
@@ -4,10 +4,12 @@
#include "common/microprofile.h"
#include "common/settings.h"
#include "common/thread.h"
+#include "core/frontend/emu_window.h"
#include "video_core/renderer_vulkan/vk_present_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_surface.h"
namespace Vulkan {
@@ -92,14 +94,17 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat form
} // Anonymous namespace
-PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_,
+PresentManager::PresentManager(const vk::Instance& instance_,
+ Core::Frontend::EmuWindow& render_window_, const Device& device_,
MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
- Swapchain& swapchain_)
- : render_window{render_window_}, device{device_},
+ Swapchain& swapchain_, vk::SurfaceKHR& surface_)
+ : instance{instance_}, render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_},
- blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())},
+ surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(),
+ swapchain.GetImageViewFormat())},
use_present_thread{Settings::values.async_presentation.GetValue()},
- image_count{swapchain.GetImageCount()} {
+ image_count{swapchain.GetImageCount()}, last_render_surface{
+ render_window_.GetWindowInfo().render_surface} {
auto& dld = device.GetLogical();
cmdpool = dld.CreateCommandPool({
@@ -286,14 +291,45 @@ void PresentManager::PresentThread(std::stop_token token) {
}
}
+void PresentManager::NotifySurfaceChanged() {
+#ifdef ANDROID
+ std::scoped_lock lock{recreate_surface_mutex};
+ recreate_surface_cv.notify_one();
+#endif
+}
+
void PresentManager::CopyToSwapchain(Frame* frame) {
MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
const auto recreate_swapchain = [&] {
- swapchain.Create(frame->width, frame->height, frame->is_srgb);
+ swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb);
image_count = swapchain.GetImageCount();
};
+#ifdef ANDROID
+ std::unique_lock lock{recreate_surface_mutex};
+
+ const auto needs_recreation = [&] {
+ if (last_render_surface != render_window.GetWindowInfo().render_surface) {
+ return true;
+ }
+ if (swapchain.NeedsRecreation(frame->is_srgb)) {
+ return true;
+ }
+ return false;
+ };
+
+ recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400),
+ [&]() { return !needs_recreation(); });
+
+ // If the frontend recreated the surface, recreate the renderer surface and swapchain.
+ if (last_render_surface != render_window.GetWindowInfo().render_surface) {
+ last_render_surface = render_window.GetWindowInfo().render_surface;
+ surface = CreateSurface(instance, render_window.GetWindowInfo());
+ recreate_swapchain();
+ }
+#endif
+
// If the size or colorspace of the incoming frames has changed, recreate the swapchain
// to account for that.
const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb);
@@ -436,7 +472,7 @@ void PresentManager::CopyToSwapchain(Frame* frame) {
// Submit the image copy/blit to the swapchain
{
- std::scoped_lock lock{scheduler.submit_mutex};
+ std::scoped_lock submit_lock{scheduler.submit_mutex};
switch (const VkResult result =
device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) {
case VK_SUCCESS:
@@ -454,4 +490,4 @@ void PresentManager::CopyToSwapchain(Frame* frame) {
swapchain.Present(render_semaphore);
}
-} // namespace Vulkan
+} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h
index 420a775e2..4ac2e2395 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.h
+++ b/src/video_core/renderer_vulkan/vk_present_manager.h
@@ -37,8 +37,9 @@ struct Frame {
class PresentManager {
public:
- PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device,
- MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain);
+ PresentManager(const vk::Instance& instance, Core::Frontend::EmuWindow& render_window,
+ const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler,
+ Swapchain& swapchain, vk::SurfaceKHR& surface);
~PresentManager();
/// Returns the last used presentation frame
@@ -54,30 +55,38 @@ public:
/// Waits for the present thread to finish presenting all queued frames.
void WaitPresent();
+ /// This is called to notify the rendering backend of a surface change
+ void NotifySurfaceChanged();
+
private:
void PresentThread(std::stop_token token);
void CopyToSwapchain(Frame* frame);
private:
+ const vk::Instance& instance;
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
Swapchain& swapchain;
+ vk::SurfaceKHR& surface;
vk::CommandPool cmdpool;
std::vector<Frame> frames;
std::queue<Frame*> present_queue;
std::queue<Frame*> free_queue;
std::condition_variable_any frame_cv;
std::condition_variable free_cv;
+ std::condition_variable recreate_surface_cv;
std::mutex swapchain_mutex;
+ std::mutex recreate_surface_mutex;
std::mutex queue_mutex;
std::mutex free_mutex;
std::jthread present_thread;
bool blit_supported;
bool use_present_thread;
- std::size_t image_count;
+ std::size_t image_count{};
+ void* last_render_surface{};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 8d3a9736b..84e3a30cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -188,7 +188,14 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork();
gpu_memory->FlushCaching();
+#if ANDROID
+ if (Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ query_cache.UpdateCounters();
+ }
+#else
query_cache.UpdateCounters();
+#endif
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
@@ -272,7 +279,14 @@ void RasterizerVulkan::DrawTexture() {
SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
+#if ANDROID
+ if (Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ query_cache.UpdateCounters();
+ }
+#else
query_cache.UpdateCounters();
+#endif
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
@@ -743,7 +757,11 @@ void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_load
}
void RasterizerVulkan::FlushWork() {
+#ifdef ANDROID
+ static constexpr u32 DRAWS_TO_DISPATCH = 1024;
+#else
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
+#endif // ANDROID
// Only check multiples of 8 draws
static_assert(DRAWS_TO_DISPATCH % 8 == 0);
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 80455ec08..17ef61147 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -239,7 +239,14 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
void Scheduler::AllocateNewContext() {
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
+#if ANDROID
+ if (Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ query_cache->UpdateCounters();
+ }
+#else
query_cache->UpdateCounters();
+#endif
}
}
@@ -250,7 +257,14 @@ void Scheduler::InvalidateState() {
}
void Scheduler::EndPendingOperations() {
+#if ANDROID
+ if (Settings::IsGPULevelHigh()) {
+ // This is problematic on Android, disable on GPU Normal.
+ query_cache->DisableStreams();
+ }
+#else
query_cache->DisableStreams();
+#endif
EndRenderPass();
}
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 8c0dec590..d3cddac69 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -107,16 +107,17 @@ VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& cap
Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_,
u32 width_, u32 height_, bool srgb)
: surface{surface_}, device{device_}, scheduler{scheduler_} {
- Create(width_, height_, srgb);
+ Create(surface_, width_, height_, srgb);
}
Swapchain::~Swapchain() = default;
-void Swapchain::Create(u32 width_, u32 height_, bool srgb) {
+void Swapchain::Create(VkSurfaceKHR surface_, u32 width_, u32 height_, bool srgb) {
is_outdated = false;
is_suboptimal = false;
width = width_;
height = height_;
+ surface = surface_;
const auto physical_device = device.GetPhysical();
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
@@ -230,7 +231,12 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
+#ifdef ANDROID
+ // On Android, do not allow surface rotation to deviate from the frontend.
+ .preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
+#else
.preTransform = capabilities.currentTransform,
+#endif
.compositeAlpha = alpha_flags,
.presentMode = present_mode,
.clipped = VK_FALSE,
@@ -266,7 +272,12 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
images = swapchain.GetImages();
image_count = static_cast<u32>(images.size());
+#ifdef ANDROID
+ // Android is already ordered the same as Switch.
+ image_view_format = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM;
+#else
image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
+#endif
}
void Swapchain::CreateSemaphores() {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index bf1ea7254..b8a1465a6 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -24,7 +24,7 @@ public:
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
- void Create(u32 width, u32 height, bool srgb);
+ void Create(VkSurfaceKHR surface, u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
bool AcquireNextImage();
@@ -118,7 +118,7 @@ private:
bool NeedsPresentModeUpdate() const;
- const VkSurfaceKHR surface;
+ VkSurfaceKHR surface;
const Device& device;
Scheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
index db04943eb..a802d3c49 100644
--- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
@@ -1,6 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
+#include <adrenotools/driver.h>
+#endif
+
#include "common/literals.h"
#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
@@ -13,7 +17,10 @@ namespace Vulkan {
using namespace Common::Literals;
TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
- : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} {
+#ifndef ANDROID
+ : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false}
+#endif
+{
{
std::scoped_lock lk{m_submission_lock};
m_submission_time = std::chrono::steady_clock::now();
@@ -30,6 +37,7 @@ void TurboMode::QueueSubmitted() {
}
void TurboMode::Run(std::stop_token stop_token) {
+#ifndef ANDROID
auto& dld = m_device.GetLogical();
// Allocate buffer. 2MiB should be sufficient.
@@ -142,8 +150,14 @@ void TurboMode::Run(std::stop_token stop_token) {
// Create a single command buffer.
auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()};
+#endif
while (!stop_token.stop_requested()) {
+#ifdef ANDROID
+#ifdef ARCHITECTURE_arm64
+ adrenotools_set_turbo(true);
+#endif
+#else
// Reset the fence.
fence.Reset();
@@ -209,7 +223,7 @@ void TurboMode::Run(std::stop_token stop_token) {
// Wait for completion.
fence.Wait();
-
+#endif
// Wait for the next graphics queue submission if necessary.
std::unique_lock lk{m_submission_lock};
Common::CondvarWait(m_submission_cv, lk, stop_token, [this] {
@@ -217,6 +231,9 @@ void TurboMode::Run(std::stop_token stop_token) {
std::chrono::milliseconds{100};
});
}
+#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
+ adrenotools_set_turbo(false);
+#endif
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h
index 99b5ac50b..9341c9867 100644
--- a/src/video_core/renderer_vulkan/vk_turbo_mode.h
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h
@@ -23,8 +23,10 @@ public:
private:
void Run(std::stop_token stop_token);
+#ifndef ANDROID
Device m_device;
MemoryAllocator m_allocator;
+#endif
std::mutex m_submission_lock;
std::condition_variable_any m_submission_cv;
std::chrono::time_point<std::chrono::steady_clock> m_submission_time{};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index 310fb551a..e77b576ec 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -31,7 +31,7 @@ struct DescriptorUpdateEntry {
class UpdateDescriptorQueue final {
// This should be plenty for the vast majority of cases. Most desktop platforms only
// provide up to 3 swapchain images.
- static constexpr size_t FRAMES_IN_FLIGHT = 5;
+ static constexpr size_t FRAMES_IN_FLIGHT = 7;
static constexpr size_t FRAME_PAYLOAD_SIZE = 0x20000;
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
index e8ddde691..b72788c6d 100644
--- a/src/video_core/texture_cache/image_info.cpp
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -22,6 +22,9 @@ using Tegra::Texture::TICEntry;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType;
+constexpr u32 RescaleHeightThreshold = 288;
+constexpr u32 DownscaleHeightThreshold = 512;
+
ImageInfo::ImageInfo(const TICEntry& config) noexcept {
forced_flushed = config.IsPitchLinear() && !Settings::values.use_reactive_flushing.GetValue();
dma_downloaded = forced_flushed;
@@ -113,8 +116,9 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
layer_stride = CalculateLayerStride(*this);
maybe_unaligned_layer_stride = CalculateLayerSize(*this);
rescaleable &= (block.depth == 0) && resources.levels == 1;
- rescaleable &= size.height > 256 || GetFormatType(format) != SurfaceType::ColorTexture;
- downscaleable = size.height > 512;
+ rescaleable &= size.height > RescaleHeightThreshold ||
+ GetFormatType(format) != SurfaceType::ColorTexture;
+ downscaleable = size.height > DownscaleHeightThreshold;
}
}
@@ -152,8 +156,8 @@ ImageInfo::ImageInfo(const Maxwell3D::Regs::RenderTargetConfig& ct,
size.depth = ct.depth;
} else {
rescaleable = block.depth == 0;
- rescaleable &= size.height > 256;
- downscaleable = size.height > 512;
+ rescaleable &= size.height > RescaleHeightThreshold;
+ downscaleable = size.height > DownscaleHeightThreshold;
type = ImageType::e2D;
resources.layers = ct.depth;
}
@@ -232,8 +236,8 @@ ImageInfo::ImageInfo(const Fermi2D::Surface& config) noexcept {
.height = config.height,
.depth = 1,
};
- rescaleable = block.depth == 0 && size.height > 256;
- downscaleable = size.height > 512;
+ rescaleable = block.depth == 0 && size.height > RescaleHeightThreshold;
+ downscaleable = size.height > DownscaleHeightThreshold;
}
}
@@ -275,8 +279,8 @@ ImageInfo::ImageInfo(const Tegra::DMA::ImageOperand& config) noexcept {
resources.layers = 1;
layer_stride = CalculateLayerStride(*this);
maybe_unaligned_layer_stride = CalculateLayerSize(*this);
- rescaleable = block.depth == 0 && size.height > 256;
- downscaleable = size.height > 512;
+ rescaleable = block.depth == 0 && size.height > RescaleHeightThreshold;
+ downscaleable = size.height > DownscaleHeightThreshold;
}
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 2cf082c5d..c7f7448e9 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -850,15 +850,11 @@ void TextureCache<P>::PopAsyncFlushes() {
template <class P>
ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) {
const ImageInfo dst_info(operand);
- const ImageId dst_id = FindDMAImage(dst_info, operand.address);
- if (!dst_id) {
- return NULL_IMAGE_ID;
- }
- auto& image = slot_images[dst_id];
- if (False(image.flags & ImageFlagBits::GpuModified)) {
- // No need to waste time on an image that's synced with guest
+ const ImageId image_id = FindDMAImage(dst_info, operand.address);
+ if (!image_id) {
return NULL_IMAGE_ID;
}
+ auto& image = slot_images[image_id];
if (!is_upload && !image.info.dma_downloaded) {
// Force a full sync.
image.info.dma_downloaded = true;
@@ -868,7 +864,7 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo
if (!base) {
return NULL_IMAGE_ID;
}
- return dst_id;
+ return image_id;
}
template <class P>
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
index 10a001b8f..9de484c29 100644
--- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
@@ -13,11 +13,39 @@ VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
[[maybe_unused]] void* user_data) {
// Skip logging known false-positive validation errors
switch (static_cast<u32>(data->messageIdNumber)) {
+#ifdef ANDROID
+ case 0xbf9cf353u: // VUID-vkCmdBindVertexBuffers2-pBuffers-04111
+ // The below are due to incorrect reporting of extendedDynamicState
+ case 0x1093bebbu: // VUID-vkCmdSetCullMode-None-03384
+ case 0x9215850fu: // VUID-vkCmdSetDepthTestEnable-None-03352
+ case 0x86bf18dcu: // VUID-vkCmdSetDepthWriteEnable-None-03354
+ case 0x0792ad08u: // VUID-vkCmdSetStencilOp-None-03351
+ case 0x93e1ba4eu: // VUID-vkCmdSetFrontFace-None-03383
+ case 0xac9c13c5u: // VUID-vkCmdSetStencilTestEnable-None-03350
+ case 0xc9a2001bu: // VUID-vkCmdSetDepthBoundsTestEnable-None-03349
+ case 0x8b7159a7u: // VUID-vkCmdSetDepthCompareOp-None-03353
+ // The below are due to incorrect reporting of extendedDynamicState2
+ case 0xb13c8036u: // VUID-vkCmdSetDepthBiasEnable-None-04872
+ case 0xdff2e5c1u: // VUID-vkCmdSetRasterizerDiscardEnable-None-04871
+ case 0x0cc85f41u: // VUID-vkCmdSetPrimitiveRestartEnable-None-04866
+ case 0x01257b492: // VUID-vkCmdSetLogicOpEXT-None-0486
+ // The below are due to incorrect reporting of vertexInputDynamicState
+ case 0x398e0dabu: // VUID-vkCmdSetVertexInputEXT-None-04790
+ // The below are due to incorrect reporting of extendedDynamicState3
+ case 0x970c11a5u: // VUID-vkCmdSetColorWriteMaskEXT-extendedDynamicState3ColorWriteMask-07364
+ case 0x6b453f78u: // VUID-vkCmdSetColorBlendEnableEXT-extendedDynamicState3ColorBlendEnable-07355
+ case 0xf66469d0u: // VUID-vkCmdSetColorBlendEquationEXT-extendedDynamicState3ColorBlendEquation-07356
+ case 0x1d43405eu: // VUID-vkCmdSetLogicOpEnableEXT-extendedDynamicState3LogicOpEnable-07365
+ case 0x638462e8u: // VUID-vkCmdSetDepthClampEnableEXT-extendedDynamicState3DepthClampEnable-07448
+ // Misc
+ case 0xe0a2da61u: // VUID-vkCmdDrawIndexed-format-07753
+#else
case 0x682a878au: // VUID-vkCmdBindVertexBuffers2EXT-pBuffers-parameter
case 0x99fb7dfdu: // UNASSIGNED-RequiredParameter (vkCmdBindVertexBuffers2EXT pBuffers[0])
case 0xe8616bf2u: // Bound VkDescriptorSet 0x0[] was destroyed. Likely push_descriptor related
case 0x1608dec0u: // Image layout in vkUpdateDescriptorSet doesn't match descriptor use
case 0x55362756u: // Descriptor binding and framebuffer attachment overlap
+#endif
return VK_FALSE;
default:
break;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index aea677cb3..a46f9beed 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -18,6 +18,10 @@
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
+#include <adrenotools/bcenabler.h>
+#endif
+
namespace Vulkan {
using namespace Common::Literals;
namespace {
@@ -262,6 +266,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
return format_properties;
}
+#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
+void OverrideBcnFormats(std::unordered_map<VkFormat, VkFormatProperties>& format_properties) {
+ // These properties are extracted from Adreno driver 512.687.0
+ constexpr VkFormatFeatureFlags tiling_features{
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
+
+ constexpr VkFormatFeatureFlags buffer_features{VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT};
+
+ static constexpr std::array bcn_formats{
+ VK_FORMAT_BC1_RGBA_SRGB_BLOCK, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, VK_FORMAT_BC2_SRGB_BLOCK,
+ VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK,
+ VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK,
+ VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC6H_SFLOAT_BLOCK, VK_FORMAT_BC6H_UFLOAT_BLOCK,
+ VK_FORMAT_BC7_SRGB_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK,
+ };
+
+ for (const auto format : bcn_formats) {
+ format_properties[format].linearTilingFeatures = tiling_features;
+ format_properties[format].optimalTilingFeatures = tiling_features;
+ format_properties[format].bufferFeatures = buffer_features;
+ }
+}
+#endif
+
NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
const std::set<std::string, std::less<>>& exts) {
if (exts.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) {
@@ -302,6 +332,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_suitable = GetSuitability(surface != nullptr);
const VkDriverId driver_id = properties.driver.driverID;
+ const auto device_id = properties.properties.deviceID;
const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
const bool is_amd_driver =
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
@@ -310,9 +341,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
const bool is_nvidia = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY;
const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK;
+ const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
+ const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
+ const bool is_s8gen2 = device_id == 0x43050a01;
- if (is_mvk && !is_suitable) {
- LOG_WARNING(Render_Vulkan, "Unsuitable driver is MoltenVK, continuing anyway");
+ if ((is_mvk || is_qualcomm || is_turnip) && !is_suitable) {
+ LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway");
} else if (!is_suitable) {
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
}
@@ -352,9 +386,64 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal);
+ supports_conditional_barriers = !(is_intel_anv || is_intel_windows);
+
CollectPhysicalMemoryInfo();
CollectToolingInfo();
+#ifdef ANDROID
+ if (is_qualcomm || is_turnip) {
+ LOG_WARNING(Render_Vulkan,
+ "Qualcomm and Turnip drivers have broken VK_EXT_custom_border_color");
+ extensions.custom_border_color = false;
+ loaded_extensions.erase(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+ }
+
+ if (is_qualcomm) {
+ must_emulate_scaled_formats = true;
+
+ LOG_WARNING(Render_Vulkan, "Qualcomm drivers have broken VK_EXT_extended_dynamic_state");
+ extensions.extended_dynamic_state = false;
+ loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
+
+ LOG_WARNING(Render_Vulkan,
+ "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation");
+ extensions.push_descriptor = false;
+ loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
+
+#ifdef ARCHITECTURE_arm64
+ // Patch the driver to enable BCn textures.
+ const auto major = (properties.properties.driverVersion >> 24) << 2;
+ const auto minor = (properties.properties.driverVersion >> 12) & 0xFFFU;
+ const auto vendor = properties.properties.vendorID;
+ const auto patch_status = adrenotools_get_bcn_type(major, minor, vendor);
+
+ if (patch_status == ADRENOTOOLS_BCN_PATCH) {
+ LOG_INFO(Render_Vulkan, "Patching Adreno driver to support BCn texture formats");
+ if (adrenotools_patch_bcn(
+ reinterpret_cast<void*>(dld.vkGetPhysicalDeviceFormatProperties))) {
+ OverrideBcnFormats(format_properties);
+ } else {
+ LOG_ERROR(Render_Vulkan, "Patch failed! Driver code may now crash");
+ }
+ } else if (patch_status == ADRENOTOOLS_BCN_BLOB) {
+ LOG_INFO(Render_Vulkan, "Adreno driver supports BCn textures without patches");
+ } else {
+ LOG_WARNING(Render_Vulkan, "Adreno driver can't be patched to enable BCn textures");
+ }
+#endif // ARCHITECTURE_arm64
+ }
+
+ const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
+ if (is_arm) {
+ must_emulate_scaled_formats = true;
+
+ LOG_WARNING(Render_Vulkan, "ARM drivers have broken VK_EXT_extended_dynamic_state");
+ extensions.extended_dynamic_state = false;
+ loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
+ }
+#endif // ANDROID
+
if (is_nvidia) {
const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
@@ -388,7 +477,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
}
}
- if (extensions.extended_dynamic_state2 && is_radv) {
+ if (extensions.extended_dynamic_state2 && (is_radv || is_qualcomm)) {
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) {
LOG_WARNING(
@@ -415,7 +504,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
dynamic_state3_enables = false;
}
}
- if (extensions.vertex_input_dynamic_state && is_radv) {
+ if (extensions.vertex_input_dynamic_state && (is_radv || is_qualcomm)) {
+ // Qualcomm S8gen2 drivers do not properly support vertex_input_dynamic_state.
// TODO(ameerj): Blacklist only offending driver versions
// TODO(ameerj): Confirm if RDNA1 is affected
const bool is_rdna2 =
@@ -467,8 +557,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
cant_blit_msaa = true;
}
- if (is_intel_anv) {
- LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format");
+ if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
+ LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
must_emulate_bgr565 = true;
}
if (extensions.push_descriptor && is_intel_anv) {
@@ -633,7 +723,8 @@ bool Device::ShouldBoostClocks() const {
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
driver_id == VK_DRIVER_ID_MESA_RADV || driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY ||
driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS ||
- driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
+ driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA ||
+ driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP;
const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 5f1c63ff9..f314d0ffe 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -85,7 +85,6 @@
// Define extensions which must be supported.
#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
- EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \
EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME) \
EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \
@@ -105,6 +104,7 @@
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \
+ EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \
EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \
EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \
@@ -141,9 +141,6 @@
FEATURE_NAME(features, vertexPipelineStoresAndAtomics) \
FEATURE_NAME(features, wideLines) \
FEATURE_NAME(host_query_reset, hostQueryReset) \
- FEATURE_NAME(robustness2, nullDescriptor) \
- FEATURE_NAME(robustness2, robustBufferAccess2) \
- FEATURE_NAME(robustness2, robustImageAccess2) \
FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \
FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \
FEATURE_NAME(variable_pointer, variablePointers) \
@@ -156,6 +153,9 @@
FEATURE_NAME(index_type_uint8, indexTypeUint8) \
FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \
FEATURE_NAME(provoking_vertex, provokingVertexLast) \
+ FEATURE_NAME(robustness2, nullDescriptor) \
+ FEATURE_NAME(robustness2, robustBufferAccess2) \
+ FEATURE_NAME(robustness2, robustImageAccess2) \
FEATURE_NAME(shader_float16_int8, shaderFloat16) \
FEATURE_NAME(shader_float16_int8, shaderInt8) \
FEATURE_NAME(timeline_semaphore, timelineSemaphore) \
@@ -295,6 +295,16 @@ public:
return features.features.textureCompressionASTC_LDR;
}
+ /// Returns true if descriptor aliasing is natively supported.
+ bool IsDescriptorAliasingSupported() const {
+ return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
+ }
+
+ /// Returns true if the device suppors float64 natively.
+ bool IsFloat64Supported() const {
+ return features.features.shaderFloat64;
+ }
+
/// Returns true if the device supports float16 natively.
bool IsFloat16Supported() const {
return features.shader_float16_int8.shaderFloat16;
@@ -495,6 +505,10 @@ public:
}
bool HasTimelineSemaphore() const {
+ if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
+ // Timeline semaphores do not work properly on all Qualcomm drivers.
+ return false;
+ }
return features.timeline_semaphore.timelineSemaphore;
}
@@ -551,6 +565,10 @@ public:
return cant_blit_msaa;
}
+ bool MustEmulateScaledFormats() const {
+ return must_emulate_scaled_formats;
+ }
+
bool MustEmulateBGR565() const {
return must_emulate_bgr565;
}
@@ -567,6 +585,10 @@ public:
return properties.properties.limits.maxVertexInputBindings;
}
+ bool SupportsConditionalBarriers() const {
+ return supports_conditional_barriers;
+ }
+
private:
/// Checks if the physical device is suitable and configures the object state
/// with all necessary info about its properties.
@@ -666,9 +688,11 @@ private:
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
+ bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
+ bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp
index 4eb3913ee..47f6f2a03 100644
--- a/src/video_core/vulkan_common/vulkan_library.cpp
+++ b/src/video_core/vulkan_common/vulkan_library.cpp
@@ -10,29 +10,35 @@
namespace Vulkan {
-Common::DynamicLibrary OpenLibrary() {
+std::shared_ptr<Common::DynamicLibrary> OpenLibrary(
+ [[maybe_unused]] Core::Frontend::GraphicsContext* context) {
LOG_DEBUG(Render_Vulkan, "Looking for a Vulkan library");
- Common::DynamicLibrary library;
+#if defined(ANDROID) && defined(ARCHITECTURE_arm64)
+ // Android manages its Vulkan driver from the frontend.
+ return context->GetDriverLibrary();
+#else
+ auto library = std::make_shared<Common::DynamicLibrary>();
#ifdef __APPLE__
// Check if a path to a specific Vulkan library has been specified.
char* const libvulkan_env = std::getenv("LIBVULKAN_PATH");
- if (!libvulkan_env || !library.Open(libvulkan_env)) {
+ if (!libvulkan_env || !library->Open(libvulkan_env)) {
// Use the libvulkan.dylib from the application bundle.
const auto filename =
Common::FS::GetBundleDirectory() / "Contents/Frameworks/libvulkan.dylib";
- void(library.Open(Common::FS::PathToUTF8String(filename).c_str()));
+ void(library->Open(Common::FS::PathToUTF8String(filename).c_str()));
}
#else
std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
LOG_DEBUG(Render_Vulkan, "Trying Vulkan library: {}", filename);
- if (!library.Open(filename.c_str())) {
+ if (!library->Open(filename.c_str())) {
// Android devices may not have libvulkan.so.1, only libvulkan.so.
filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
LOG_DEBUG(Render_Vulkan, "Trying Vulkan library (second attempt): {}", filename);
- void(library.Open(filename.c_str()));
+ void(library->Open(filename.c_str()));
}
#endif
return library;
+#endif
}
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h
index 364ca979b..e1734525e 100644
--- a/src/video_core/vulkan_common/vulkan_library.h
+++ b/src/video_core/vulkan_common/vulkan_library.h
@@ -3,10 +3,14 @@
#pragma once
+#include <memory>
+
#include "common/dynamic_library.h"
+#include "core/frontend/graphics_context.h"
namespace Vulkan {
-Common::DynamicLibrary OpenLibrary();
+std::shared_ptr<Common::DynamicLibrary> OpenLibrary(
+ [[maybe_unused]] Core::Frontend::GraphicsContext* context = nullptr);
} // namespace Vulkan