diff options
Diffstat (limited to 'src/video_core')
47 files changed, 861 insertions, 301 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 308d013d6..bf6439530 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -133,8 +133,8 @@ add_library(video_core STATIC renderer_opengl/gl_shader_util.h renderer_opengl/gl_state_tracker.cpp renderer_opengl/gl_state_tracker.h - renderer_opengl/gl_stream_buffer.cpp - renderer_opengl/gl_stream_buffer.h + renderer_opengl/gl_staging_buffer_pool.cpp + renderer_opengl/gl_staging_buffer_pool.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h renderer_opengl/gl_texture_cache_base.cpp @@ -281,7 +281,7 @@ create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PUBLIC glad shader_recompiler stb) -if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) +if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID)) add_dependencies(video_core ffmpeg-build) endif() @@ -345,3 +345,7 @@ endif() if (YUZU_ENABLE_LTO) set_property(TARGET video_core PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) endif() + +if (ANDROID AND ARCHITECTURE_arm64) + target_link_libraries(video_core PRIVATE adrenotools) +endif() diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f1ad5f7cb..251a4a880 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -478,7 +478,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { if (committed_ranges.empty()) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - async_buffers.emplace_back(std::optional<Async_Buffer>{}); } return; @@ -539,7 +538,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { committed_ranges.clear(); if (downloads.empty()) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - async_buffers.emplace_back(std::optional<Async_Buffer>{}); } return; @@ -691,7 +689,7 @@ void BufferCache<P>::BindHostIndexBuffer() { const u32 size = channel_state->index_buffer.size; const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { auto upload_staging = runtime.UploadStagingBuffer(size); std::array<BufferCopy, 1> copies{ {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; @@ -717,20 +715,38 @@ void BufferCache<P>::BindHostIndexBuffer() { template <class P> void BufferCache<P>::BindHostVertexBuffers() { + HostBindings host_bindings; + bool any_valid{false}; auto& flags = maxwell3d->dirty.flags; for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - const Binding& binding = channel_state->vertex_buffers[index]; - Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer, binding.buffer_id); - SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; } - flags[Dirty::VertexBuffer0 + index] = false; + host_bindings.min_index = std::min(host_bindings.min_index, index); + host_bindings.max_index = std::max(host_bindings.max_index, index); + any_valid = true; + } - const u32 stride = maxwell3d->regs.vertex_streams[index].stride; - const u32 offset = buffer.Offset(binding.cpu_addr); - runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); + if (any_valid) { + host_bindings.max_index++; + for (u32 index = host_bindings.min_index; index < host_bindings.max_index; index++) { + flags[Dirty::VertexBuffer0 + index] = false; + + const Binding& binding = channel_state->vertex_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + + TouchBuffer(buffer, binding.buffer_id); + SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); + + const u32 stride = maxwell3d->regs.vertex_streams[index].stride; + const u32 offset = buffer.Offset(binding.cpu_addr); + + host_bindings.buffers.push_back(reinterpret_cast<void*>(&buffer)); + host_bindings.offsets.push_back(offset); + host_bindings.sizes.push_back(binding.size); + host_bindings.strides.push_back(stride); + } + runtime.BindVertexBuffers(host_bindings); } } @@ -884,15 +900,25 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { if (maxwell3d->regs.transform_feedback_enabled == 0) { return; } + HostBindings host_bindings; for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = channel_state->transform_feedback_buffers[index]; + if (maxwell3d->regs.transform_feedback.controls[index].varying_count == 0 && + maxwell3d->regs.transform_feedback.controls[index].stride == 0) { + break; + } Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); - runtime.BindTransformFeedbackBuffer(index, buffer, offset, size); + host_bindings.buffers.push_back(reinterpret_cast<void*>(&buffer)); + host_bindings.offsets.push_back(offset); + host_bindings.sizes.push_back(binding.size); + } + if (host_bindings.buffers.size() > 0) { + runtime.BindTransformFeedbackBuffers(host_bindings); } } @@ -1462,7 +1488,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, template <class P> void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span<BufferCopy> copies) { - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { MappedUploadMemory(buffer, total_size_bytes, copies); } else { ImmediateUploadMemory(buffer, largest_copy, copies); @@ -1473,7 +1499,7 @@ template <class P> void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, [[maybe_unused]] u64 largest_copy, [[maybe_unused]] std::span<const BufferCopy> copies) { - if constexpr (!USE_MEMORY_MAPS) { + if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) { std::span<u8> immediate_buffer; for (const BufferCopy& copy : copies) { std::span<const u8> upload_span; @@ -1532,7 +1558,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, auto& buffer = slot_buffers[buffer_id]; SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { auto upload_staging = runtime.UploadStagingBuffer(copy_size); std::array copies{BufferCopy{ .src_offset = upload_staging.offset, @@ -1618,6 +1644,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si template <class P> void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { + bool dirty_index{false}; + boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> dirty_vertex_buffers; const auto scalar_replace = [buffer_id](Binding& binding) { if (binding.buffer_id == buffer_id) { binding.buffer_id = BufferId{}; @@ -1626,8 +1654,19 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { const auto replace = [scalar_replace](std::span<Binding> bindings) { std::ranges::for_each(bindings, scalar_replace); }; - scalar_replace(channel_state->index_buffer); - replace(channel_state->vertex_buffers); + + if (channel_state->index_buffer.buffer_id == buffer_id) { + channel_state->index_buffer.buffer_id = BufferId{}; + dirty_index = true; + } + + for (u32 index = 0; index < channel_state->vertex_buffers.size(); index++) { + auto& binding = channel_state->vertex_buffers[index]; + if (binding.buffer_id == buffer_id) { + binding.buffer_id = BufferId{}; + dirty_vertex_buffers.push_back(index); + } + } std::ranges::for_each(channel_state->uniform_buffers, replace); std::ranges::for_each(channel_state->storage_buffers, replace); replace(channel_state->transform_feedback_buffers); @@ -1644,20 +1683,21 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); slot_buffers.erase(buffer_id); - NotifyBufferDeletion(); -} - -template <class P> -void BufferCache<P>::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { channel_state->dirty_uniform_buffers.fill(~u32{0}); channel_state->uniform_buffer_binding_sizes.fill({}); } + auto& flags = maxwell3d->dirty.flags; - flags[Dirty::IndexBuffer] = true; - flags[Dirty::VertexBuffers] = true; - for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - flags[Dirty::VertexBuffer0 + index] = true; + if (dirty_index) { + flags[Dirty::IndexBuffer] = true; + } + + if (dirty_vertex_buffers.size() > 0) { + flags[Dirty::VertexBuffers] = true; + for (auto index : dirty_vertex_buffers) { + flags[Dirty::VertexBuffer0 + index] = true; + } } channel_state->has_deleted_buffers = true; } diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index c689fe06b..cf359e241 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -105,6 +105,15 @@ static constexpr Binding NULL_BINDING{ .buffer_id = NULL_BUFFER_ID, }; +struct HostBindings { + boost::container::small_vector<void*, NUM_VERTEX_BUFFERS> buffers; + boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> offsets; + boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> sizes; + boost::container::small_vector<u64, NUM_VERTEX_BUFFERS> strides; + u32 min_index{NUM_VERTEX_BUFFERS}; + u32 max_index{0}; +}; + class BufferCacheChannelInfo : public ChannelInfo { public: BufferCacheChannelInfo() = delete; @@ -173,6 +182,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; @@ -518,8 +528,6 @@ private: void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); - void NotifyBufferDeletion(); - [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, bool is_written) const; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2f986097f..62d70e9f3 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -593,6 +593,12 @@ void Maxwell3D::ProcessQueryCondition() { } void Maxwell3D::ProcessCounterReset() { +#if ANDROID + if (!Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + return; + } +#endif switch (regs.clear_report_value) { case Regs::ClearReport::ZPassPixelCount: rasterizer->ResetCounter(QueryType::SamplesPassed); @@ -614,6 +620,12 @@ std::optional<u64> Maxwell3D::GetQueryResult() { case Regs::ReportSemaphore::Report::Payload: return regs.report_semaphore.payload; case Regs::ReportSemaphore::Report::ZPassPixelCount64: +#if ANDROID + if (!Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + return 120; + } +#endif // Deferred. rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed, system.GPU().GetTicks()); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 295a416a8..456f733cf 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -14,6 +14,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "core/hle/service/nvdrv/nvdata.h" #include "core/perf_stats.h" #include "video_core/cdma_pusher.h" diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 3c5317777..889144f38 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -7,7 +7,7 @@ #include "common/settings.h" #include "common/thread.h" #include "core/core.h" -#include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "video_core/control/scheduler.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index e8761a747..2d3f58201 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -5,6 +5,7 @@ #include "common/logging/log.h" #include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "video_core/renderer_base.h" namespace VideoCore { diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 8d20cbece..3e12a8813 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -9,7 +9,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" -#include "core/frontend/emu_window.h" +#include "core/frontend/framebuffer_layout.h" #include "video_core/gpu.h" #include "video_core/rasterizer_interface.h" @@ -89,6 +89,9 @@ public: void RequestScreenshot(void* data, std::function<void(bool)> callback, const Layout::FramebufferLayout& layout); + /// This is called to notify the rendering backend of a surface change + virtual void NotifySurfaceChanged() {} + protected: Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<Core::Frontend::GraphicsContext> context; diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp index e2a189b63..be92cc2f4 100644 --- a/src/video_core/renderer_null/renderer_null.cpp +++ b/src/video_core/renderer_null/renderer_null.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "video_core/renderer_null/renderer_null.h" namespace Null { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6d3bda192..0cc546a3a 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { return views.back().texture.handle; } -BufferCacheRuntime::BufferCacheRuntime(const Device& device_) - : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, +BufferCacheRuntime::BufferCacheRuntime(const Device& device_, + StagingBufferPool& staging_buffer_pool_) + : device{device_}, staging_buffer_pool{staging_buffer_pool_}, + has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { @@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) }(); } +StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestUploadBuffer(size); +} + +StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestDownloadBuffer(size); +} + u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { if (device.CanReportMemoryUsage()) { return device_access_memory - device.GetCurrentDedicatedVideoMemory(); @@ -147,13 +157,47 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { return 2_GiB; } -void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, - std::span<const VideoCommon::BufferCopy> copies) { +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + if (barrier) { + PreCopyBarrier(); + } for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData( - src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), - static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); + glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset), + static_cast<GLintptr>(copy.dst_offset), + static_cast<GLsizeiptr>(copy.size)); } + if (barrier) { + PostCopyBarrier(); + } +} + +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier); +} + +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); +} + +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies) { + CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); +} + +void BufferCacheRuntime::PreCopyBarrier() { + // TODO: finer grained barrier? + glMemoryBarrier(GL_ALL_BARRIER_BITS); +} + +void BufferCacheRuntime::PostCopyBarrier() { + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); +} + +void BufferCacheRuntime::Finish() { + glFinish(); } void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { @@ -188,6 +232,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, } } +void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { + for (u32 index = 0; index < bindings.buffers.size(); index++) { + BindVertexBuffer( + bindings.min_index + index, *reinterpret_cast<Buffer*>(bindings.buffers[index]), + static_cast<u32>(bindings.offsets[index]), static_cast<u32>(bindings.sizes[index]), + static_cast<u32>(bindings.strides[index])); + } +} + void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size) { if (use_assembly_shaders) { @@ -276,6 +329,15 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); } +void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings) { + for (u32 index = 0; index < bindings.buffers.size(); index++) { + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, + reinterpret_cast<Buffer*>(bindings.buffers[index])->Handle(), + static_cast<GLintptr>(bindings.offsets[index]), + static_cast<GLsizeiptr>(bindings.sizes[index])); + } +} + void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { *texture_handles++ = buffer.View(offset, size, format); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 18d3c3ac0..e4e000284 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -7,12 +7,12 @@ #include <span> #include "common/common_types.h" -#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" namespace OpenGL { @@ -60,16 +60,34 @@ class BufferCacheRuntime { public: static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); - explicit BufferCacheRuntime(const Device& device_); + explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_); + + [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); + + [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); + + void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + + void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + + void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, std::span<const VideoCommon::BufferCopy> copies); + void PreCopyBarrier(); + void PostCopyBarrier(); + void Finish(); + void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); + void BindVertexBuffers(VideoCommon::HostBindings& bindings); void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size); @@ -82,6 +100,7 @@ public: bool is_written); void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings); void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, VideoCore::Surface::PixelFormat format); @@ -169,6 +188,7 @@ private: }; const Device& device; + StagingBufferPool& staging_buffer_pool; bool has_fast_buffer_sub_data = false; bool use_assembly_shaders = false; @@ -201,7 +221,7 @@ private: struct BufferCacheParams { using Runtime = OpenGL::BufferCacheRuntime; using Buffer = OpenGL::Buffer; - using Async_Buffer = u32; + using Async_Buffer = OpenGL::StagingBufferMap; using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; static constexpr bool IS_OPENGL = true; @@ -209,9 +229,12 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; + + // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; }; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 400c21981..03d234f2f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -201,6 +201,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && !(is_amd || (is_intel && !is_linux)) && !strict_context_required; use_driver_cache = is_nvidia; + supports_conditional_barriers = !is_intel; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index cc0b95f1a..ad27264e5 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -188,6 +188,10 @@ public: return strict_context_required; } + bool SupportsConditionalBarriers() const { + return supports_conditional_barriers; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -233,6 +237,7 @@ private: bool has_bool_ref_bug{}; bool can_report_memory{}; bool strict_context_required{}; + bool supports_conditional_barriers{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f5baa0f3c..fc711c44a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -24,6 +24,7 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" @@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra StateTracker& state_tracker_) : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), - texture_cache_runtime(device, program_manager, state_tracker), - texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device), + texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), + texture_cache(texture_cache_runtime, *this), + buffer_cache_runtime(device, staging_buffer_pool), buffer_cache(*this, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 410d8ffc5..a73ad15c1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -230,6 +230,7 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; + StagingBufferPool staging_buffer_pool; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6ecda2984..3f077311e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -232,12 +232,14 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, host_info{ + .support_float64 = true, .support_float16 = false, .support_int64 = device.HasShaderInt64(), .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), + .support_conditional_barrier = device.SupportsConditionalBarriers(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h index ca2bd8e8e..207a75d42 100644 --- a/src/video_core/renderer_opengl/gl_shader_context.h +++ b/src/video_core/renderer_opengl/gl_shader_context.h @@ -4,6 +4,7 @@ #pragma once #include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp new file mode 100644 index 000000000..bbb06e51f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp @@ -0,0 +1,150 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <array> +#include <memory> +#include <span> + +#include <glad/glad.h> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" + +MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192)); + +namespace OpenGL { + +StagingBufferMap::~StagingBufferMap() { + if (sync) { + sync->Create(); + } +} + +StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) + : storage_flags{storage_flags_}, map_flags{map_flags_} {} + +StagingBuffers::~StagingBuffers() = default; + +StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { + MICROPROFILE_SCOPE(OpenGL_BufferRequest); + + const size_t index = RequestBuffer(requested_size); + OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; + sync_indices[index] = insert_fence ? ++current_sync_index : 0; + return StagingBufferMap{ + .mapped_span = std::span(maps[index], requested_size), + .sync = sync, + .buffer = buffers[index].handle, + }; +} + +size_t StagingBuffers::RequestBuffer(size_t requested_size) { + if (const std::optional<size_t> index = FindBuffer(requested_size); index) { + return *index; + } + + OGLBuffer& buffer = buffers.emplace_back(); + buffer.Create(); + const auto next_pow2_size = Common::NextPow2(requested_size); + glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, + storage_flags | GL_MAP_PERSISTENT_BIT); + maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, + map_flags | GL_MAP_PERSISTENT_BIT))); + syncs.emplace_back(); + sync_indices.emplace_back(); + sizes.push_back(next_pow2_size); + + ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && + maps.size() == sizes.size()); + + return buffers.size() - 1; +} + +std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { + size_t known_unsignaled_index = current_sync_index + 1; + size_t smallest_buffer = std::numeric_limits<size_t>::max(); + std::optional<size_t> found; + const size_t num_buffers = sizes.size(); + for (size_t index = 0; index < num_buffers; ++index) { + const size_t buffer_size = sizes[index]; + if (buffer_size < requested_size || buffer_size >= smallest_buffer) { + continue; + } + if (syncs[index].handle != 0) { + if (sync_indices[index] >= known_unsignaled_index) { + // This fence is later than a fence that is known to not be signaled + continue; + } + if (!syncs[index].IsSignaled()) { + // Since this fence hasn't been signaled, it's safe to assume all later + // fences haven't been signaled either + known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); + continue; + } + syncs[index].Release(); + } + smallest_buffer = buffer_size; + found = index; + } + return found; +} + +StreamBuffer::StreamBuffer() { + static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; + buffer.Create(); + glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); + glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); + mapped_pointer = + static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); + for (OGLSync& sync : fences) { + sync.Create(); + } +} + +std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { + ASSERT(size < REGION_SIZE); + for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; + ++region) { + fences[region].Create(); + } + used_iterator = iterator; + + for (size_t region = Region(free_iterator) + 1, + region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); + region < region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); + } + if (iterator + size >= free_iterator) { + free_iterator = iterator + size; + } + if (iterator + size > STREAM_BUFFER_SIZE) { + for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { + fences[region].Create(); + } + used_iterator = 0; + iterator = 0; + free_iterator = size; + + for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); + } + } + const size_t offset = iterator; + iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); + return {std::span(mapped_pointer + offset, size), offset}; +} + +StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) { + return upload_buffers.RequestMap(size, true); +} + +StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { + return download_buffers.RequestMap(size, false); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h index 8fe927aaf..60f72d3a0 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h @@ -4,8 +4,10 @@ #pragma once #include <array> +#include <optional> #include <span> #include <utility> +#include <vector> #include <glad/glad.h> @@ -17,6 +19,35 @@ namespace OpenGL { using namespace Common::Literals; +struct StagingBufferMap { + ~StagingBufferMap(); + + std::span<u8> mapped_span; + size_t offset = 0; + OGLSync* sync; + GLuint buffer; +}; + +struct StagingBuffers { + explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); + ~StagingBuffers(); + + StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); + + size_t RequestBuffer(size_t requested_size); + + std::optional<size_t> FindBuffer(size_t requested_size); + + std::vector<OGLSync> syncs; + std::vector<OGLBuffer> buffers; + std::vector<u8*> maps; + std::vector<size_t> sizes; + std::vector<size_t> sync_indices; + GLenum storage_flags; + GLenum map_flags; + size_t current_sync_index = 0; +}; + class StreamBuffer { static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB; static constexpr size_t NUM_SYNCS = 16; @@ -48,4 +79,17 @@ private: std::array<OGLSync, NUM_SYNCS> fences; }; +class StagingBufferPool { +public: + StagingBufferPool() = default; + ~StagingBufferPool() = default; + + StagingBufferMap RequestUploadBuffer(size_t size); + StagingBufferMap RequestDownloadBuffer(size_t size); + +private: + StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; + StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; +}; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp deleted file mode 100644 index 2005c8993..000000000 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include <array> -#include <memory> -#include <span> - -#include <glad/glad.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" - -namespace OpenGL { - -StreamBuffer::StreamBuffer() { - static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; - buffer.Create(); - glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); - glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); - mapped_pointer = - static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); - for (OGLSync& sync : fences) { - sync.Create(); - } -} - -std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { - ASSERT(size < REGION_SIZE); - for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; - ++region) { - fences[region].Create(); - } - used_iterator = iterator; - - for (size_t region = Region(free_iterator) + 1, - region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); - region < region_end; ++region) { - glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); - fences[region].Release(); - } - if (iterator + size >= free_iterator) { - free_iterator = iterator + size; - } - if (iterator + size > STREAM_BUFFER_SIZE) { - for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { - fences[region].Create(); - } - used_iterator = 0; - iterator = 0; - free_iterator = size; - - for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { - glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); - fences[region].Release(); - } - } - const size_t offset = iterator; - iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); - return {std::span(mapped_pointer + offset, size), offset}; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 56d0ff869..1c5dbcdd8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -456,19 +456,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; } } - } // Anonymous namespace -ImageBufferMap::~ImageBufferMap() { - if (sync) { - sync->Create(); - } -} - TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, - StateTracker& state_tracker_) - : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager), - format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} { + StateTracker& state_tracker_, + StagingBufferPool& staging_buffer_pool_) + : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_}, + util_shaders(program_manager), format_conversion_pass{util_shaders}, + resolution{Settings::values.resolution_info} { static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; @@ -558,12 +553,12 @@ void TextureCacheRuntime::Finish() { glFinish(); } -ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { - return upload_buffers.RequestMap(size, true); +StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestUploadBuffer(size); } -ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { - return download_buffers.RequestMap(size, false); +StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestDownloadBuffer(size); } u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { @@ -648,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, is_linear ? GL_LINEAR : GL_NEAREST); } -void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { switch (image.info.type) { case ImageType::e2D: @@ -690,64 +685,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept { return device.HasASTC(); } -TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) - : storage_flags{storage_flags_}, map_flags{map_flags_} {} - -TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; - -ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, - bool insert_fence) { - const size_t index = RequestBuffer(requested_size); - OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; - return ImageBufferMap{ - .mapped_span = std::span(maps[index], requested_size), - .sync = sync, - .buffer = buffers[index].handle, - }; -} - -size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { - if (const std::optional<size_t> index = FindBuffer(requested_size); index) { - return *index; - } - - OGLBuffer& buffer = buffers.emplace_back(); - buffer.Create(); - glNamedBufferStorage(buffer.handle, requested_size, nullptr, - storage_flags | GL_MAP_PERSISTENT_BIT); - maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size, - map_flags | GL_MAP_PERSISTENT_BIT))); - - syncs.emplace_back(); - sizes.push_back(requested_size); - - ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && - maps.size() == sizes.size()); - - return buffers.size() - 1; -} - -std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { - size_t smallest_buffer = std::numeric_limits<size_t>::max(); - std::optional<size_t> found; - const size_t num_buffers = sizes.size(); - for (size_t index = 0; index < num_buffers; ++index) { - const size_t buffer_size = sizes[index]; - if (buffer_size < requested_size || buffer_size >= smallest_buffer) { - continue; - } - if (syncs[index].handle != 0) { - if (!syncs[index].IsSignaled()) { - continue; - } - syncs[index].Release(); - } - smallest_buffer = buffer_size; - found = index; - } - return found; -} - Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { @@ -823,7 +760,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset, } } -void Image::UploadMemory(const ImageBufferMap& map, +void Image::UploadMemory(const StagingBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { UploadMemory(map.buffer, map.offset, copies); } @@ -870,7 +807,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b } } -void Image::DownloadMemory(ImageBufferMap& map, +void Image::DownloadMemory(StagingBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { DownloadMemory(map.buffer, map.offset, copies); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3e9b3302b..1148b73d7 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -11,6 +11,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -37,15 +38,6 @@ using VideoCommon::Region2D; using VideoCommon::RenderTargets; using VideoCommon::SlotVector; -struct ImageBufferMap { - ~ImageBufferMap(); - - std::span<u8> mapped_span; - size_t offset = 0; - OGLSync* sync; - GLuint buffer; -}; - struct FormatProperties { GLenum compatibility_class; bool compatibility_by_size; @@ -74,14 +66,15 @@ class TextureCacheRuntime { public: explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, - StateTracker& state_tracker); + StateTracker& state_tracker, + StagingBufferPool& staging_buffer_pool); ~TextureCacheRuntime(); void Finish(); - ImageBufferMap UploadStagingBuffer(size_t size); + StagingBufferMap UploadStagingBuffer(size_t size); - ImageBufferMap DownloadStagingBuffer(size_t size); + StagingBufferMap DownloadStagingBuffer(size_t size); u64 GetDeviceLocalMemory() const { return device_access_memory; @@ -120,7 +113,7 @@ public: const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void AccelerateImageUpload(Image& image, const ImageBufferMap& map, + void AccelerateImageUpload(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void InsertUploadMemoryBarrier(); @@ -149,35 +142,16 @@ public: } private: - struct StagingBuffers { - explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); - ~StagingBuffers(); - - ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); - - size_t RequestBuffer(size_t requested_size); - - std::optional<size_t> FindBuffer(size_t requested_size); - - std::vector<OGLSync> syncs; - std::vector<OGLBuffer> buffers; - std::vector<u8*> maps; - std::vector<size_t> sizes; - GLenum storage_flags; - GLenum map_flags; - }; - const Device& device; StateTracker& state_tracker; + StagingBufferPool& staging_buffer_pool; + UtilShaders util_shaders; FormatConversionPass format_conversion_pass; std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; bool has_broken_texture_view_formats = false; - StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; - StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; - OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; @@ -213,7 +187,7 @@ public: void UploadMemory(GLuint buffer_handle, size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies); - void UploadMemory(const ImageBufferMap& map, + void UploadMemory(const StagingBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, @@ -222,7 +196,8 @@ public: void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies); - void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); + void DownloadMemory(StagingBufferMap& map, + std::span<const VideoCommon::BufferImageCopy> copies); GLuint StorageHandle() noexcept; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 2c7ac210b..544982d18 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -19,6 +19,7 @@ #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/accelerated_swizzle.h" @@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) UtilShaders::~UtilShaders() = default; -void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, +void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles) { static constexpr GLuint BINDING_INPUT_BUFFER = 0; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; @@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; @@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; @@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, +void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_INPUT_BUFFER = 0; diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 9013808e7..feecd404c 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -16,23 +16,23 @@ namespace OpenGL { class Image; class ProgramManager; -struct ImageBufferMap; +struct StagingBufferMap; class UtilShaders { public: explicit UtilShaders(ProgramManager& program_manager); ~UtilShaders(); - void ASTCDecode(Image& image, const ImageBufferMap& map, + void ASTCDecode(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, + void BlockLinearUpload2D(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, + void BlockLinearUpload3D(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void PitchUpload(Image& image, const ImageBufferMap& map, + void PitchUpload(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void CopyBC4(Image& dst_image, Image& src_image, diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index b75d7220d..9a0b10568 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -347,6 +347,14 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { + if (device.MustEmulateScaledFormats()) { + if (type == Maxwell::VertexAttribute::Type::SScaled) { + type = Maxwell::VertexAttribute::Type::SInt; + } else if (type == Maxwell::VertexAttribute::Type::UScaled) { + type = Maxwell::VertexAttribute::Type::UInt; + } + } + const VkFormat format{([&]() { switch (type) { case Maxwell::VertexAttribute::Type::UnusedEnumDoNotUseBecauseItWillGoAway: diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 8e31eba34..77128c6e2 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -16,7 +16,7 @@ #include "common/settings.h" #include "common/telemetry.h" #include "core/core_timing.h" -#include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "core/telemetry_session.h" #include "video_core/gpu.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -84,8 +84,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr<Core::Frontend::GraphicsContext> context_) try : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), - instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), + instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window.GetWindowInfo())), @@ -93,7 +93,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, state_tracker(), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), - present_manager(render_window, device, memory_allocator, scheduler, swapchain), + present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, + surface), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, scheduler, screen_info), rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index f44367cb2..b2e8cbd1b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -54,6 +54,10 @@ public: return device.GetDriverName(); } + void NotifySurfaceChanged() override { + present_manager.NotifySurfaceChanged(); + } + private: void Report() const; @@ -63,7 +67,7 @@ private: Core::Memory::Memory& cpu_memory; Tegra::GPU& gpu; - Common::DynamicLibrary library; + std::shared_ptr<Common::DynamicLibrary> library; vk::InstanceDispatch dld; vk::Instance instance; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 1e0fdd3d9..acb143fc7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -74,7 +74,7 @@ struct ScreenRectVertex { } }; -constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { +std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { // clang-format off return { 2.f / width, 0.f, 0.f, 0.f, 0.f, 2.f / height, 0.f, 0.f, @@ -441,7 +441,12 @@ void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& f if (const std::size_t swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count || current_srgb != is_srgb) { current_srgb = is_srgb; +#ifdef ANDROID + // Android is already ordered the same as Switch. + image_view_format = current_srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; +#else image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; +#endif image_count = swapchain_images; Recreate(); } @@ -1107,7 +1112,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { .pNext = nullptr, .flags = 0, .imageType = VK_IMAGE_TYPE_2D, - .format = GetFormat(framebuffer), + .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : GetFormat(framebuffer), .extent = { .width = (up_scale * framebuffer.width) >> down_shift, @@ -1128,14 +1133,14 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { const auto create_commit = [&](vk::Image& image) { return memory_allocator.Commit(image, MemoryUsage::DeviceLocal); }; - const auto create_image_view = [&](vk::Image& image) { + const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) { return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, .flags = 0, .image = *image, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = GetFormat(framebuffer), + .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : GetFormat(framebuffer), .components = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -1165,7 +1170,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { const u32 down_shift = Settings::values.resolution_info.down_shift; aa_image = create_image(true, up_scale, down_shift); aa_commit = create_commit(aa_image); - aa_image_view = create_image_view(aa_image); + aa_image_view = create_image_view(aa_image, true); VkExtent2D size{ .width = (up_scale * framebuffer.width) >> down_shift, .height = (up_scale * framebuffer.height) >> down_shift, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 9627eb129..d72d99899 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -7,7 +7,6 @@ #include <span> #include <vector> -#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -303,9 +302,13 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m DescriptorPool& descriptor_pool) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, - uint8_pass(device, scheduler, descriptor_pool, staging_pool, compute_pass_descriptor_queue), quad_index_pass(device, scheduler, descriptor_pool, staging_pool, compute_pass_descriptor_queue) { + if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + // TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers. + uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool, + compute_pass_descriptor_queue); + } quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_, scheduler_, staging_pool_); quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_, @@ -442,7 +445,9 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat topology == PrimitiveTopology::QuadStrip); } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { vk_index_type = VK_INDEX_TYPE_UINT16; - std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); + if (uint8_pass) { + std::tie(vk_buffer, vk_offset) = uint8_pass->Assemble(num_indices, buffer, offset); + } } if (vk_buffer == VK_NULL_HANDLE) { // Vulkan doesn't support null index buffers. Replace it with our own null buffer. @@ -496,6 +501,40 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset } } +void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { + boost::container::small_vector<VkBuffer, 32> buffer_handles; + for (u32 index = 0; index < bindings.buffers.size(); index++) { + auto& buffer = *reinterpret_cast<Buffer*>(bindings.buffers[index]); + auto handle = buffer.Handle(); + if (handle == VK_NULL_HANDLE) { + bindings.offsets[index] = 0; + bindings.sizes[index] = VK_WHOLE_SIZE; + if (!device.HasNullDescriptor()) { + ReserveNullBuffer(); + handle = *null_buffer; + } + } + buffer_handles.push_back(handle); + } + if (device.IsExtExtendedDynamicStateSupported()) { + scheduler.Record([bindings = bindings, + buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT( + bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), + reinterpret_cast<const VkDeviceSize*>(bindings.offsets.data()), + reinterpret_cast<const VkDeviceSize*>(bindings.sizes.data()), + reinterpret_cast<const VkDeviceSize*>(bindings.strides.data())); + }); + } else { + scheduler.Record([bindings = bindings, + buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers( + bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), + reinterpret_cast<const VkDeviceSize*>(bindings.offsets.data())); + }); + } +} + void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size) { if (!device.IsExtTransformFeedbackSupported()) { @@ -517,6 +556,25 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, }); } +void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings) { + if (!device.IsExtTransformFeedbackSupported()) { + // Already logged in the rasterizer + return; + } + boost::container::small_vector<VkBuffer, 4> buffer_handles; + for (u32 index = 0; index < bindings.buffers.size(); index++) { + auto& buffer = *reinterpret_cast<Buffer*>(bindings.buffers[index]); + buffer_handles.push_back(buffer.Handle()); + } + scheduler.Record( + [bindings = bindings, buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT( + 0, static_cast<u32>(buffer_handles.size()), buffer_handles.data(), + reinterpret_cast<const VkDeviceSize*>(bindings.offsets.data()), + reinterpret_cast<const VkDeviceSize*>(bindings.sizes.data())); + }); +} + void BufferCacheRuntime::ReserveNullBuffer() { if (null_buffer) { return; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 5e9602905..92d3e9f32 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -18,6 +18,7 @@ namespace Vulkan { class Device; class DescriptorPool; class Scheduler; +struct HostVertexBinding; class BufferCacheRuntime; @@ -96,8 +97,10 @@ public: void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count); void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); + void BindVertexBuffers(VideoCommon::HostBindings& bindings); void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); + void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings); std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage, [[maybe_unused]] u32 binding_index, u32 size) { @@ -139,7 +142,7 @@ private: vk::Buffer null_buffer; MemoryCommit null_buffer_commit; - Uint8Pass uint8_pass; + std::unique_ptr<Uint8Pass> uint8_pass; QuadIndexedPass quad_index_pass; }; @@ -157,6 +160,7 @@ struct BufferCacheParams { static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true; }; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 66dfe5733..5734f51e5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,14 +114,16 @@ Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribut return Shader::AttributeType::Disabled; case Maxwell::VertexAttribute::Type::SNorm: case Maxwell::VertexAttribute::Type::UNorm: - case Maxwell::VertexAttribute::Type::UScaled: - case Maxwell::VertexAttribute::Type::SScaled: case Maxwell::VertexAttribute::Type::Float: return Shader::AttributeType::Float; case Maxwell::VertexAttribute::Type::SInt: return Shader::AttributeType::SignedInt; case Maxwell::VertexAttribute::Type::UInt: return Shader::AttributeType::UnsignedInt; + case Maxwell::VertexAttribute::Type::UScaled: + return Shader::AttributeType::UnsignedScaled; + case Maxwell::VertexAttribute::Type::SScaled: + return Shader::AttributeType::SignedScaled; } return Shader::AttributeType::Float; } @@ -286,14 +288,17 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, - workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), + workers(device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY + ? 1 + : (std::max(std::thread::hardware_concurrency(), 2U) - 1), + "VkPipelineBuilder"), serialization_thread(1, "VkPipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverId driver_id{device.GetDriverID()}; profile = Shader::Profile{ .supported_spirv = device.SupportedSpirvVersion(), .unified_descriptor_binding = true, - .support_descriptor_aliasing = true, + .support_descriptor_aliasing = device.IsDescriptorAliasingSupported(), .support_int8 = device.IsInt8Supported(), .support_int16 = device.IsShaderInt16Supported(), .support_int64 = device.IsShaderInt64Supported(), @@ -324,6 +329,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .support_derivative_control = true, .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_native_ndc = device.IsExtDepthClipControlSupported(), + .support_scaled_attributes = !device.MustEmulateScaledFormats(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -341,8 +347,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .has_broken_signed_operations = false, .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, .ignore_nan_fp_comparisons = false, - }; + .has_broken_spirv_subgroup_mask_vector_extract_dynamic = + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; host_info = Shader::HostTranslateInfo{ + .support_float64 = device.IsFloat64Supported(), .support_float16 = device.IsFloat16Supported(), .support_int64 = device.IsShaderInt64Supported(), .needs_demote_reorder = diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index c49583013..10ace0420 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -4,10 +4,12 @@ #include "common/microprofile.h" #include "common/settings.h" #include "common/thread.h" +#include "core/frontend/emu_window.h" #include "video_core/renderer_vulkan/vk_present_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_surface.h" namespace Vulkan { @@ -92,14 +94,17 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat form } // Anonymous namespace -PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_, +PresentManager::PresentManager(const vk::Instance& instance_, + Core::Frontend::EmuWindow& render_window_, const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_, - Swapchain& swapchain_) - : render_window{render_window_}, device{device_}, + Swapchain& swapchain_, vk::SurfaceKHR& surface_) + : instance{instance_}, render_window{render_window_}, device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_}, - blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())}, + surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(), + swapchain.GetImageViewFormat())}, use_present_thread{Settings::values.async_presentation.GetValue()}, - image_count{swapchain.GetImageCount()} { + image_count{swapchain.GetImageCount()}, last_render_surface{ + render_window_.GetWindowInfo().render_surface} { auto& dld = device.GetLogical(); cmdpool = dld.CreateCommandPool({ @@ -286,14 +291,45 @@ void PresentManager::PresentThread(std::stop_token token) { } } +void PresentManager::NotifySurfaceChanged() { +#ifdef ANDROID + std::scoped_lock lock{recreate_surface_mutex}; + recreate_surface_cv.notify_one(); +#endif +} + void PresentManager::CopyToSwapchain(Frame* frame) { MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); const auto recreate_swapchain = [&] { - swapchain.Create(frame->width, frame->height, frame->is_srgb); + swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); image_count = swapchain.GetImageCount(); }; +#ifdef ANDROID + std::unique_lock lock{recreate_surface_mutex}; + + const auto needs_recreation = [&] { + if (last_render_surface != render_window.GetWindowInfo().render_surface) { + return true; + } + if (swapchain.NeedsRecreation(frame->is_srgb)) { + return true; + } + return false; + }; + + recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400), + [&]() { return !needs_recreation(); }); + + // If the frontend recreated the surface, recreate the renderer surface and swapchain. + if (last_render_surface != render_window.GetWindowInfo().render_surface) { + last_render_surface = render_window.GetWindowInfo().render_surface; + surface = CreateSurface(instance, render_window.GetWindowInfo()); + recreate_swapchain(); + } +#endif + // If the size or colorspace of the incoming frames has changed, recreate the swapchain // to account for that. const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb); @@ -436,7 +472,7 @@ void PresentManager::CopyToSwapchain(Frame* frame) { // Submit the image copy/blit to the swapchain { - std::scoped_lock lock{scheduler.submit_mutex}; + std::scoped_lock submit_lock{scheduler.submit_mutex}; switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) { case VK_SUCCESS: @@ -454,4 +490,4 @@ void PresentManager::CopyToSwapchain(Frame* frame) { swapchain.Present(render_semaphore); } -} // namespace Vulkan +} // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h index 420a775e2..4ac2e2395 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.h +++ b/src/video_core/renderer_vulkan/vk_present_manager.h @@ -37,8 +37,9 @@ struct Frame { class PresentManager { public: - PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device, - MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain); + PresentManager(const vk::Instance& instance, Core::Frontend::EmuWindow& render_window, + const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler, + Swapchain& swapchain, vk::SurfaceKHR& surface); ~PresentManager(); /// Returns the last used presentation frame @@ -54,30 +55,38 @@ public: /// Waits for the present thread to finish presenting all queued frames. void WaitPresent(); + /// This is called to notify the rendering backend of a surface change + void NotifySurfaceChanged(); + private: void PresentThread(std::stop_token token); void CopyToSwapchain(Frame* frame); private: + const vk::Instance& instance; Core::Frontend::EmuWindow& render_window; const Device& device; MemoryAllocator& memory_allocator; Scheduler& scheduler; Swapchain& swapchain; + vk::SurfaceKHR& surface; vk::CommandPool cmdpool; std::vector<Frame> frames; std::queue<Frame*> present_queue; std::queue<Frame*> free_queue; std::condition_variable_any frame_cv; std::condition_variable free_cv; + std::condition_variable recreate_surface_cv; std::mutex swapchain_mutex; + std::mutex recreate_surface_mutex; std::mutex queue_mutex; std::mutex free_mutex; std::jthread present_thread; bool blit_supported; bool use_present_thread; - std::size_t image_count; + std::size_t image_count{}; + void* last_render_surface{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8d3a9736b..84e3a30cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -188,7 +188,14 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { FlushWork(); gpu_memory->FlushCaching(); +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache.UpdateCounters(); + } +#else query_cache.UpdateCounters(); +#endif GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; if (!pipeline) { @@ -272,7 +279,14 @@ void RasterizerVulkan::DrawTexture() { SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache.UpdateCounters(); + } +#else query_cache.UpdateCounters(); +#endif texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); @@ -743,7 +757,11 @@ void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_load } void RasterizerVulkan::FlushWork() { +#ifdef ANDROID + static constexpr u32 DRAWS_TO_DISPATCH = 1024; +#else static constexpr u32 DRAWS_TO_DISPATCH = 4096; +#endif // ANDROID // Only check multiples of 8 draws static_assert(DRAWS_TO_DISPATCH % 8 == 0); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 80455ec08..17ef61147 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -239,7 +239,14 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se void Scheduler::AllocateNewContext() { // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache->UpdateCounters(); + } +#else query_cache->UpdateCounters(); +#endif } } @@ -250,7 +257,14 @@ void Scheduler::InvalidateState() { } void Scheduler::EndPendingOperations() { +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache->DisableStreams(); + } +#else query_cache->DisableStreams(); +#endif EndRenderPass(); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 8c0dec590..d3cddac69 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -107,16 +107,17 @@ VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& cap Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width_, u32 height_, bool srgb) : surface{surface_}, device{device_}, scheduler{scheduler_} { - Create(width_, height_, srgb); + Create(surface_, width_, height_, srgb); } Swapchain::~Swapchain() = default; -void Swapchain::Create(u32 width_, u32 height_, bool srgb) { +void Swapchain::Create(VkSurfaceKHR surface_, u32 width_, u32 height_, bool srgb) { is_outdated = false; is_suboptimal = false; width = width_; height = height_; + surface = surface_; const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; @@ -230,7 +231,12 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, +#ifdef ANDROID + // On Android, do not allow surface rotation to deviate from the frontend. + .preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, +#else .preTransform = capabilities.currentTransform, +#endif .compositeAlpha = alpha_flags, .presentMode = present_mode, .clipped = VK_FALSE, @@ -266,7 +272,12 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo images = swapchain.GetImages(); image_count = static_cast<u32>(images.size()); +#ifdef ANDROID + // Android is already ordered the same as Switch. + image_view_format = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; +#else image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; +#endif } void Swapchain::CreateSemaphores() { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index bf1ea7254..b8a1465a6 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -24,7 +24,7 @@ public: ~Swapchain(); /// Creates (or recreates) the swapchain with a given size. - void Create(u32 width, u32 height, bool srgb); + void Create(VkSurfaceKHR surface, u32 width, u32 height, bool srgb); /// Acquires the next image in the swapchain, waits as needed. bool AcquireNextImage(); @@ -118,7 +118,7 @@ private: bool NeedsPresentModeUpdate() const; - const VkSurfaceKHR surface; + VkSurfaceKHR surface; const Device& device; Scheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index db04943eb..a802d3c49 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -1,6 +1,10 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) +#include <adrenotools/driver.h> +#endif + #include "common/literals.h" #include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -13,7 +17,10 @@ namespace Vulkan { using namespace Common::Literals; TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) - : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { +#ifndef ANDROID + : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} +#endif +{ { std::scoped_lock lk{m_submission_lock}; m_submission_time = std::chrono::steady_clock::now(); @@ -30,6 +37,7 @@ void TurboMode::QueueSubmitted() { } void TurboMode::Run(std::stop_token stop_token) { +#ifndef ANDROID auto& dld = m_device.GetLogical(); // Allocate buffer. 2MiB should be sufficient. @@ -142,8 +150,14 @@ void TurboMode::Run(std::stop_token stop_token) { // Create a single command buffer. auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY); auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()}; +#endif while (!stop_token.stop_requested()) { +#ifdef ANDROID +#ifdef ARCHITECTURE_arm64 + adrenotools_set_turbo(true); +#endif +#else // Reset the fence. fence.Reset(); @@ -209,7 +223,7 @@ void TurboMode::Run(std::stop_token stop_token) { // Wait for completion. fence.Wait(); - +#endif // Wait for the next graphics queue submission if necessary. std::unique_lock lk{m_submission_lock}; Common::CondvarWait(m_submission_cv, lk, stop_token, [this] { @@ -217,6 +231,9 @@ void TurboMode::Run(std::stop_token stop_token) { std::chrono::milliseconds{100}; }); } +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) + adrenotools_set_turbo(false); +#endif } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h index 99b5ac50b..9341c9867 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.h +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h @@ -23,8 +23,10 @@ public: private: void Run(std::stop_token stop_token); +#ifndef ANDROID Device m_device; MemoryAllocator m_allocator; +#endif std::mutex m_submission_lock; std::condition_variable_any m_submission_cv; std::chrono::time_point<std::chrono::steady_clock> m_submission_time{}; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 310fb551a..e77b576ec 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -31,7 +31,7 @@ struct DescriptorUpdateEntry { class UpdateDescriptorQueue final { // This should be plenty for the vast majority of cases. Most desktop platforms only // provide up to 3 swapchain images. - static constexpr size_t FRAMES_IN_FLIGHT = 5; + static constexpr size_t FRAMES_IN_FLIGHT = 7; static constexpr size_t FRAME_PAYLOAD_SIZE = 0x20000; static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index e8ddde691..b72788c6d 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -22,6 +22,9 @@ using Tegra::Texture::TICEntry; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceType; +constexpr u32 RescaleHeightThreshold = 288; +constexpr u32 DownscaleHeightThreshold = 512; + ImageInfo::ImageInfo(const TICEntry& config) noexcept { forced_flushed = config.IsPitchLinear() && !Settings::values.use_reactive_flushing.GetValue(); dma_downloaded = forced_flushed; @@ -113,8 +116,9 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); rescaleable &= (block.depth == 0) && resources.levels == 1; - rescaleable &= size.height > 256 || GetFormatType(format) != SurfaceType::ColorTexture; - downscaleable = size.height > 512; + rescaleable &= size.height > RescaleHeightThreshold || + GetFormatType(format) != SurfaceType::ColorTexture; + downscaleable = size.height > DownscaleHeightThreshold; } } @@ -152,8 +156,8 @@ ImageInfo::ImageInfo(const Maxwell3D::Regs::RenderTargetConfig& ct, size.depth = ct.depth; } else { rescaleable = block.depth == 0; - rescaleable &= size.height > 256; - downscaleable = size.height > 512; + rescaleable &= size.height > RescaleHeightThreshold; + downscaleable = size.height > DownscaleHeightThreshold; type = ImageType::e2D; resources.layers = ct.depth; } @@ -232,8 +236,8 @@ ImageInfo::ImageInfo(const Fermi2D::Surface& config) noexcept { .height = config.height, .depth = 1, }; - rescaleable = block.depth == 0 && size.height > 256; - downscaleable = size.height > 512; + rescaleable = block.depth == 0 && size.height > RescaleHeightThreshold; + downscaleable = size.height > DownscaleHeightThreshold; } } @@ -275,8 +279,8 @@ ImageInfo::ImageInfo(const Tegra::DMA::ImageOperand& config) noexcept { resources.layers = 1; layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); - rescaleable = block.depth == 0 && size.height > 256; - downscaleable = size.height > 512; + rescaleable = block.depth == 0 && size.height > RescaleHeightThreshold; + downscaleable = size.height > DownscaleHeightThreshold; } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2cf082c5d..c7f7448e9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -850,15 +850,11 @@ void TextureCache<P>::PopAsyncFlushes() { template <class P> ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) { const ImageInfo dst_info(operand); - const ImageId dst_id = FindDMAImage(dst_info, operand.address); - if (!dst_id) { - return NULL_IMAGE_ID; - } - auto& image = slot_images[dst_id]; - if (False(image.flags & ImageFlagBits::GpuModified)) { - // No need to waste time on an image that's synced with guest + const ImageId image_id = FindDMAImage(dst_info, operand.address); + if (!image_id) { return NULL_IMAGE_ID; } + auto& image = slot_images[image_id]; if (!is_upload && !image.info.dma_downloaded) { // Force a full sync. image.info.dma_downloaded = true; @@ -868,7 +864,7 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo if (!base) { return NULL_IMAGE_ID; } - return dst_id; + return image_id; } template <class P> diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp index 10a001b8f..9de484c29 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp @@ -13,11 +13,39 @@ VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, [[maybe_unused]] void* user_data) { // Skip logging known false-positive validation errors switch (static_cast<u32>(data->messageIdNumber)) { +#ifdef ANDROID + case 0xbf9cf353u: // VUID-vkCmdBindVertexBuffers2-pBuffers-04111 + // The below are due to incorrect reporting of extendedDynamicState + case 0x1093bebbu: // VUID-vkCmdSetCullMode-None-03384 + case 0x9215850fu: // VUID-vkCmdSetDepthTestEnable-None-03352 + case 0x86bf18dcu: // VUID-vkCmdSetDepthWriteEnable-None-03354 + case 0x0792ad08u: // VUID-vkCmdSetStencilOp-None-03351 + case 0x93e1ba4eu: // VUID-vkCmdSetFrontFace-None-03383 + case 0xac9c13c5u: // VUID-vkCmdSetStencilTestEnable-None-03350 + case 0xc9a2001bu: // VUID-vkCmdSetDepthBoundsTestEnable-None-03349 + case 0x8b7159a7u: // VUID-vkCmdSetDepthCompareOp-None-03353 + // The below are due to incorrect reporting of extendedDynamicState2 + case 0xb13c8036u: // VUID-vkCmdSetDepthBiasEnable-None-04872 + case 0xdff2e5c1u: // VUID-vkCmdSetRasterizerDiscardEnable-None-04871 + case 0x0cc85f41u: // VUID-vkCmdSetPrimitiveRestartEnable-None-04866 + case 0x01257b492: // VUID-vkCmdSetLogicOpEXT-None-0486 + // The below are due to incorrect reporting of vertexInputDynamicState + case 0x398e0dabu: // VUID-vkCmdSetVertexInputEXT-None-04790 + // The below are due to incorrect reporting of extendedDynamicState3 + case 0x970c11a5u: // VUID-vkCmdSetColorWriteMaskEXT-extendedDynamicState3ColorWriteMask-07364 + case 0x6b453f78u: // VUID-vkCmdSetColorBlendEnableEXT-extendedDynamicState3ColorBlendEnable-07355 + case 0xf66469d0u: // VUID-vkCmdSetColorBlendEquationEXT-extendedDynamicState3ColorBlendEquation-07356 + case 0x1d43405eu: // VUID-vkCmdSetLogicOpEnableEXT-extendedDynamicState3LogicOpEnable-07365 + case 0x638462e8u: // VUID-vkCmdSetDepthClampEnableEXT-extendedDynamicState3DepthClampEnable-07448 + // Misc + case 0xe0a2da61u: // VUID-vkCmdDrawIndexed-format-07753 +#else case 0x682a878au: // VUID-vkCmdBindVertexBuffers2EXT-pBuffers-parameter case 0x99fb7dfdu: // UNASSIGNED-RequiredParameter (vkCmdBindVertexBuffers2EXT pBuffers[0]) case 0xe8616bf2u: // Bound VkDescriptorSet 0x0[] was destroyed. Likely push_descriptor related case 0x1608dec0u: // Image layout in vkUpdateDescriptorSet doesn't match descriptor use case 0x55362756u: // Descriptor binding and framebuffer attachment overlap +#endif return VK_FALSE; default: break; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index aea677cb3..a46f9beed 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -18,6 +18,10 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) +#include <adrenotools/bcenabler.h> +#endif + namespace Vulkan { using namespace Common::Literals; namespace { @@ -262,6 +266,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica return format_properties; } +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) +void OverrideBcnFormats(std::unordered_map<VkFormat, VkFormatProperties>& format_properties) { + // These properties are extracted from Adreno driver 512.687.0 + constexpr VkFormatFeatureFlags tiling_features{ + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; + + constexpr VkFormatFeatureFlags buffer_features{VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT}; + + static constexpr std::array bcn_formats{ + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC6H_SFLOAT_BLOCK, VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK, + }; + + for (const auto format : bcn_formats) { + format_properties[format].linearTilingFeatures = tiling_features; + format_properties[format].optimalTilingFeatures = tiling_features; + format_properties[format].bufferFeatures = buffer_features; + } +} +#endif + NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, const std::set<std::string, std::less<>>& exts) { if (exts.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { @@ -302,6 +332,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const bool is_suitable = GetSuitability(surface != nullptr); const VkDriverId driver_id = properties.driver.driverID; + const auto device_id = properties.properties.deviceID; const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; const bool is_amd_driver = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; @@ -310,9 +341,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; const bool is_nvidia = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY; const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK; + const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY; + const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP; + const bool is_s8gen2 = device_id == 0x43050a01; - if (is_mvk && !is_suitable) { - LOG_WARNING(Render_Vulkan, "Unsuitable driver is MoltenVK, continuing anyway"); + if ((is_mvk || is_qualcomm || is_turnip) && !is_suitable) { + LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway"); } else if (!is_suitable) { throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); } @@ -352,9 +386,64 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); + supports_conditional_barriers = !(is_intel_anv || is_intel_windows); + CollectPhysicalMemoryInfo(); CollectToolingInfo(); +#ifdef ANDROID + if (is_qualcomm || is_turnip) { + LOG_WARNING(Render_Vulkan, + "Qualcomm and Turnip drivers have broken VK_EXT_custom_border_color"); + extensions.custom_border_color = false; + loaded_extensions.erase(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + } + + if (is_qualcomm) { + must_emulate_scaled_formats = true; + + LOG_WARNING(Render_Vulkan, "Qualcomm drivers have broken VK_EXT_extended_dynamic_state"); + extensions.extended_dynamic_state = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + + LOG_WARNING(Render_Vulkan, + "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation"); + extensions.push_descriptor = false; + loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); + +#ifdef ARCHITECTURE_arm64 + // Patch the driver to enable BCn textures. + const auto major = (properties.properties.driverVersion >> 24) << 2; + const auto minor = (properties.properties.driverVersion >> 12) & 0xFFFU; + const auto vendor = properties.properties.vendorID; + const auto patch_status = adrenotools_get_bcn_type(major, minor, vendor); + + if (patch_status == ADRENOTOOLS_BCN_PATCH) { + LOG_INFO(Render_Vulkan, "Patching Adreno driver to support BCn texture formats"); + if (adrenotools_patch_bcn( + reinterpret_cast<void*>(dld.vkGetPhysicalDeviceFormatProperties))) { + OverrideBcnFormats(format_properties); + } else { + LOG_ERROR(Render_Vulkan, "Patch failed! Driver code may now crash"); + } + } else if (patch_status == ADRENOTOOLS_BCN_BLOB) { + LOG_INFO(Render_Vulkan, "Adreno driver supports BCn textures without patches"); + } else { + LOG_WARNING(Render_Vulkan, "Adreno driver can't be patched to enable BCn textures"); + } +#endif // ARCHITECTURE_arm64 + } + + const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; + if (is_arm) { + must_emulate_scaled_formats = true; + + LOG_WARNING(Render_Vulkan, "ARM drivers have broken VK_EXT_extended_dynamic_state"); + extensions.extended_dynamic_state = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + } +#endif // ANDROID + if (is_nvidia) { const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; const auto arch = GetNvidiaArchitecture(physical, supported_extensions); @@ -388,7 +477,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); } } - if (extensions.extended_dynamic_state2 && is_radv) { + if (extensions.extended_dynamic_state2 && (is_radv || is_qualcomm)) { const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) { LOG_WARNING( @@ -415,7 +504,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR dynamic_state3_enables = false; } } - if (extensions.vertex_input_dynamic_state && is_radv) { + if (extensions.vertex_input_dynamic_state && (is_radv || is_qualcomm)) { + // Qualcomm S8gen2 drivers do not properly support vertex_input_dynamic_state. // TODO(ameerj): Blacklist only offending driver versions // TODO(ameerj): Confirm if RDNA1 is affected const bool is_rdna2 = @@ -467,8 +557,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); cant_blit_msaa = true; } - if (is_intel_anv) { - LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format"); + if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { + LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format"); must_emulate_bgr565 = true; } if (extensions.push_descriptor && is_intel_anv) { @@ -633,7 +723,8 @@ bool Device::ShouldBoostClocks() const { driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || driver_id == VK_DRIVER_ID_MESA_RADV || driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY || driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || - driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; + driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP; const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 5f1c63ff9..f314d0ffe 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -85,7 +85,6 @@ // Define extensions which must be supported. #define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ - EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \ EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME) \ EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \ @@ -105,6 +104,7 @@ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \ EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \ EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \ @@ -141,9 +141,6 @@ FEATURE_NAME(features, vertexPipelineStoresAndAtomics) \ FEATURE_NAME(features, wideLines) \ FEATURE_NAME(host_query_reset, hostQueryReset) \ - FEATURE_NAME(robustness2, nullDescriptor) \ - FEATURE_NAME(robustness2, robustBufferAccess2) \ - FEATURE_NAME(robustness2, robustImageAccess2) \ FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \ FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \ FEATURE_NAME(variable_pointer, variablePointers) \ @@ -156,6 +153,9 @@ FEATURE_NAME(index_type_uint8, indexTypeUint8) \ FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \ FEATURE_NAME(provoking_vertex, provokingVertexLast) \ + FEATURE_NAME(robustness2, nullDescriptor) \ + FEATURE_NAME(robustness2, robustBufferAccess2) \ + FEATURE_NAME(robustness2, robustImageAccess2) \ FEATURE_NAME(shader_float16_int8, shaderFloat16) \ FEATURE_NAME(shader_float16_int8, shaderInt8) \ FEATURE_NAME(timeline_semaphore, timelineSemaphore) \ @@ -295,6 +295,16 @@ public: return features.features.textureCompressionASTC_LDR; } + /// Returns true if descriptor aliasing is natively supported. + bool IsDescriptorAliasingSupported() const { + return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; + } + + /// Returns true if the device suppors float64 natively. + bool IsFloat64Supported() const { + return features.features.shaderFloat64; + } + /// Returns true if the device supports float16 natively. bool IsFloat16Supported() const { return features.shader_float16_int8.shaderFloat16; @@ -495,6 +505,10 @@ public: } bool HasTimelineSemaphore() const { + if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + // Timeline semaphores do not work properly on all Qualcomm drivers. + return false; + } return features.timeline_semaphore.timelineSemaphore; } @@ -551,6 +565,10 @@ public: return cant_blit_msaa; } + bool MustEmulateScaledFormats() const { + return must_emulate_scaled_formats; + } + bool MustEmulateBGR565() const { return must_emulate_bgr565; } @@ -567,6 +585,10 @@ public: return properties.properties.limits.maxVertexInputBindings; } + bool SupportsConditionalBarriers() const { + return supports_conditional_barriers; + } + private: /// Checks if the physical device is suitable and configures the object state /// with all necessary info about its properties. @@ -666,9 +688,11 @@ private: bool has_nsight_graphics{}; ///< Has Nsight Graphics attached bool supports_d24_depth{}; ///< Supports D24 depth buffers. bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. + bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. + bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 sets_per_pool{}; ///< Sets per Description Pool diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp index 4eb3913ee..47f6f2a03 100644 --- a/src/video_core/vulkan_common/vulkan_library.cpp +++ b/src/video_core/vulkan_common/vulkan_library.cpp @@ -10,29 +10,35 @@ namespace Vulkan { -Common::DynamicLibrary OpenLibrary() { +std::shared_ptr<Common::DynamicLibrary> OpenLibrary( + [[maybe_unused]] Core::Frontend::GraphicsContext* context) { LOG_DEBUG(Render_Vulkan, "Looking for a Vulkan library"); - Common::DynamicLibrary library; +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) + // Android manages its Vulkan driver from the frontend. + return context->GetDriverLibrary(); +#else + auto library = std::make_shared<Common::DynamicLibrary>(); #ifdef __APPLE__ // Check if a path to a specific Vulkan library has been specified. char* const libvulkan_env = std::getenv("LIBVULKAN_PATH"); - if (!libvulkan_env || !library.Open(libvulkan_env)) { + if (!libvulkan_env || !library->Open(libvulkan_env)) { // Use the libvulkan.dylib from the application bundle. const auto filename = Common::FS::GetBundleDirectory() / "Contents/Frameworks/libvulkan.dylib"; - void(library.Open(Common::FS::PathToUTF8String(filename).c_str())); + void(library->Open(Common::FS::PathToUTF8String(filename).c_str())); } #else std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); LOG_DEBUG(Render_Vulkan, "Trying Vulkan library: {}", filename); - if (!library.Open(filename.c_str())) { + if (!library->Open(filename.c_str())) { // Android devices may not have libvulkan.so.1, only libvulkan.so. filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); LOG_DEBUG(Render_Vulkan, "Trying Vulkan library (second attempt): {}", filename); - void(library.Open(filename.c_str())); + void(library->Open(filename.c_str())); } #endif return library; +#endif } } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h index 364ca979b..e1734525e 100644 --- a/src/video_core/vulkan_common/vulkan_library.h +++ b/src/video_core/vulkan_common/vulkan_library.h @@ -3,10 +3,14 @@ #pragma once +#include <memory> + #include "common/dynamic_library.h" +#include "core/frontend/graphics_context.h" namespace Vulkan { -Common::DynamicLibrary OpenLibrary(); +std::shared_ptr<Common::DynamicLibrary> OpenLibrary( + [[maybe_unused]] Core::Frontend::GraphicsContext* context = nullptr); } // namespace Vulkan |