diff options
38 files changed, 430 insertions, 269 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 30a3c939e..3d03bbf94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -255,7 +255,7 @@ endif() # boost asio's concept usage doesn't play nicely with some compilers yet. add_definitions(-DBOOST_ASIO_DISABLE_CONCEPTS) if (MSVC) - add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/std:c++latest>) + add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/std:c++20>) # boost still makes use of deprecated result_of. add_definitions(-D_HAS_DEPRECATED_RESULT_OF) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 55b113297..0696201df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -43,7 +43,7 @@ if (MSVC) /Zo /permissive- /EHsc - /std:c++latest + /std:c++20 /utf-8 /volatile:iso /Zc:externConstexpr @@ -51,8 +51,10 @@ if (MSVC) /Zc:throwingNew /GT + # Modules + /experimental:module- # Disable module support explicitly due to conflicts with precompiled headers + # External headers diagnostics - /experimental:external # Enables the external headers options. This option isn't required in Visual Studio 2019 version 16.10 and later /external:anglebrackets # Treats all headers included by #include <header>, where the header file is enclosed in angle brackets (< >), as external headers /external:W0 # Sets the default warning level to 0 for external headers, effectively turning off warnings for external headers diff --git a/src/android/app/src/main/AndroidManifest.xml b/src/android/app/src/main/AndroidManifest.xml index 43087f2c0..eef566042 100644 --- a/src/android/app/src/main/AndroidManifest.xml +++ b/src/android/app/src/main/AndroidManifest.xml @@ -6,17 +6,10 @@ SPDX-License-Identifier: GPL-3.0-or-later --> <manifest xmlns:android="http://schemas.android.com/apk/res/android"> - <uses-feature - android:name="android.hardware.touchscreen" - android:required="false"/> - <uses-feature - android:name="android.hardware.gamepad" - android:required="false"/> - - <uses-feature - android:name="android.hardware.vulkan.version" - android:version="0x401000" - android:required="true" /> + <uses-feature android:name="android.hardware.touchscreen" android:required="false" /> + <uses-feature android:name="android.hardware.gamepad" android:required="false" /> + <uses-feature android:name="android.software.leanback" android:required="false" /> + <uses-feature android:name="android.hardware.vulkan.version" android:version="0x401000" android:required="true" /> <uses-permission android:name="android.permission.INTERNET" /> <uses-permission android:name="android.permission.FOREGROUND_SERVICE" /> @@ -31,7 +24,7 @@ SPDX-License-Identifier: GPL-3.0-or-later android:hasFragileUserData="true" android:supportsRtl="true" android:isGame="true" - android:banner="@drawable/ic_launcher" + android:banner="@drawable/tv_banner" android:extractNativeLibs="true" android:fullBackupContent="@xml/data_extraction_rules" android:dataExtractionRules="@xml/data_extraction_rules_api_31" @@ -44,9 +37,10 @@ SPDX-License-Identifier: GPL-3.0-or-later <!-- This intentfilter marks this Activity as the one that gets launched from Home screen. --> <intent-filter> - <action android:name="android.intent.action.MAIN"/> + <action android:name="android.intent.action.MAIN" /> - <category android:name="android.intent.category.LAUNCHER"/> + <category android:name="android.intent.category.LAUNCHER" /> + <category android:name="android.intent.category.LEANBACK_LAUNCHER" /> </intent-filter> </activity> diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt index 94d5156cf..f4db61cb3 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt @@ -263,7 +263,8 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener { val config: Configuration = resources.configuration if ((config.screenLayout and Configuration.SCREENLAYOUT_LONG_YES) != 0 || - (config.screenLayout and Configuration.SCREENLAYOUT_LONG_NO) == 0) { + (config.screenLayout and Configuration.SCREENLAYOUT_LONG_NO) == 0 || + (config.screenLayout and Configuration.SCREENLAYOUT_SIZE_SMALL) != 0) { return rotation } when (rotation) { diff --git a/src/android/app/src/main/res/drawable-xhdpi/tv_banner.png b/src/android/app/src/main/res/drawable-xhdpi/tv_banner.png Binary files differnew file mode 100644 index 000000000..20c770591 --- /dev/null +++ b/src/android/app/src/main/res/drawable-xhdpi/tv_banner.png diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp index 07a679c32..703ef4494 100644 --- a/src/audio_core/audio_core.cpp +++ b/src/audio_core/audio_core.cpp @@ -47,12 +47,4 @@ AudioRenderer::ADSP::ADSP& AudioCore::GetADSP() { return *adsp; } -void AudioCore::SetNVDECActive(bool active) { - nvdec_active = active; -} - -bool AudioCore::IsNVDECActive() const { - return nvdec_active; -} - } // namespace AudioCore diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h index e33e00a3e..ea047773e 100644 --- a/src/audio_core/audio_core.h +++ b/src/audio_core/audio_core.h @@ -57,18 +57,6 @@ public: */ AudioRenderer::ADSP::ADSP& GetADSP(); - /** - * Toggle NVDEC state, used to avoid stall in playback. - * - * @param active - Set true if nvdec is active, otherwise false. - */ - void SetNVDECActive(bool active); - - /** - * Get NVDEC state. - */ - bool IsNVDECActive() const; - private: /** * Create the sinks on startup. @@ -83,8 +71,6 @@ private: std::unique_ptr<Sink::Sink> input_sink; /// The ADSP in the sysmodule std::unique_ptr<AudioRenderer::ADSP::ADSP> adsp; - /// Is NVDec currently active? - bool nvdec_active{false}; }; } // namespace AudioCore diff --git a/src/common/settings.cpp b/src/common/settings.cpp index ff53e80bb..9ff3edabb 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -235,6 +235,7 @@ void RestoreGlobalState(bool is_powered_on) { values.bg_green.SetGlobal(true); values.bg_blue.SetGlobal(true); values.enable_compute_pipelines.SetGlobal(true); + values.use_video_framerate.SetGlobal(true); // System values.language_index.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 7f865b2a7..9682281b0 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -482,6 +482,7 @@ struct Values { SwitchableSetting<AstcRecompression, true> astc_recompression{ AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3, "astc_recompression"}; + SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"}; SwitchableSetting<u8> bg_red{0, "bg_red"}; SwitchableSetting<u8> bg_green{0, "bg_green"}; diff --git a/src/common/uuid.cpp b/src/common/uuid.cpp index 89e1ed225..035df7fe0 100644 --- a/src/common/uuid.cpp +++ b/src/common/uuid.cpp @@ -48,7 +48,7 @@ std::array<u8, 0x10> ConstructFromRawString(std::string_view raw_string) { } std::array<u8, 0x10> ConstructFromFormattedString(std::string_view formatted_string) { - std::array<u8, 0x10> uuid; + std::array<u8, 0x10> uuid{}; size_t i = 0; diff --git a/src/core/core.cpp b/src/core/core.cpp index 4406ae30e..7ba704f18 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -216,6 +216,14 @@ struct System::Impl { } } + void SetNVDECActive(bool is_nvdec_active) { + nvdec_active = is_nvdec_active; + } + + bool GetNVDECActive() { + return nvdec_active; + } + void InitializeDebugger(System& system, u16 port) { debugger = std::make_unique<Debugger>(system, port); } @@ -485,6 +493,8 @@ struct System::Impl { std::atomic_bool is_powered_on{}; bool exit_lock = false; + bool nvdec_active{}; + Reporter reporter; std::unique_ptr<Memory::CheatEngine> cheat_engine; std::unique_ptr<Tools::Freezer> memory_freezer; @@ -594,6 +604,14 @@ void System::UnstallApplication() { impl->UnstallApplication(); } +void System::SetNVDECActive(bool is_nvdec_active) { + impl->SetNVDECActive(is_nvdec_active); +} + +bool System::GetNVDECActive() { + return impl->GetNVDECActive(); +} + void System::InitializeDebugger() { impl->InitializeDebugger(*this, Settings::values.gdbstub_port.GetValue()); } diff --git a/src/core/core.h b/src/core/core.h index 4f153154f..ff2e4bd30 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -189,6 +189,9 @@ public: std::unique_lock<std::mutex> StallApplication(); void UnstallApplication(); + void SetNVDECActive(bool is_nvdec_active); + [[nodiscard]] bool GetNVDECActive(); + /** * Initialize the debugger. */ diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp index 50f44f598..cd9ac2e75 100644 --- a/src/core/file_sys/control_metadata.cpp +++ b/src/core/file_sys/control_metadata.cpp @@ -23,8 +23,8 @@ const std::array<const char*, 16> LANGUAGE_NAMES{{ "Portuguese", "Russian", "Korean", - "Taiwanese", - "Chinese", + "TraditionalChinese", + "SimplifiedChinese", "BrazilianPortuguese", }}; @@ -45,17 +45,17 @@ constexpr std::array<Language, 18> language_to_codes = {{ Language::German, Language::Italian, Language::Spanish, - Language::Chinese, + Language::SimplifiedChinese, Language::Korean, Language::Dutch, Language::Portuguese, Language::Russian, - Language::Taiwanese, + Language::TraditionalChinese, Language::BritishEnglish, Language::CanadianFrench, Language::LatinAmericanSpanish, - Language::Chinese, - Language::Taiwanese, + Language::SimplifiedChinese, + Language::TraditionalChinese, Language::BrazilianPortuguese, }}; diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h index 6a81873b1..c98efb00d 100644 --- a/src/core/file_sys/control_metadata.h +++ b/src/core/file_sys/control_metadata.h @@ -84,8 +84,8 @@ enum class Language : u8 { Portuguese = 10, Russian = 11, Korean = 12, - Taiwanese = 13, - Chinese = 14, + TraditionalChinese = 13, + SimplifiedChinese = 14, BrazilianPortuguese = 15, Default = 255, diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index f786f2add..4e61d4335 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp @@ -25,6 +25,8 @@ #include "core/file_sys/vfs_layered.h" #include "core/file_sys/vfs_vector.h" #include "core/hle/service/filesystem/filesystem.h" +#include "core/hle/service/ns/language.h" +#include "core/hle/service/set/set.h" #include "core/loader/loader.h" #include "core/loader/nso.h" #include "core/memory/cheat_engine.h" @@ -624,8 +626,37 @@ PatchManager::Metadata PatchManager::ParseControlNCA(const NCA& nca) const { auto nacp = nacp_file == nullptr ? nullptr : std::make_unique<NACP>(nacp_file); + // Get language code from settings + const auto language_code = + Service::Set::GetLanguageCodeFromIndex(Settings::values.language_index.GetValue()); + + // Convert to application language and get priority list + const auto application_language = + Service::NS::ConvertToApplicationLanguage(language_code) + .value_or(Service::NS::ApplicationLanguage::AmericanEnglish); + const auto language_priority_list = + Service::NS::GetApplicationLanguagePriorityList(application_language); + + // Convert to language names + auto priority_language_names = FileSys::LANGUAGE_NAMES; // Copy + if (language_priority_list) { + for (size_t i = 0; i < priority_language_names.size(); ++i) { + // Relies on FileSys::LANGUAGE_NAMES being in the same order as + // Service::NS::ApplicationLanguage + const auto language_index = static_cast<u8>(language_priority_list->at(i)); + + if (language_index < FileSys::LANGUAGE_NAMES.size()) { + priority_language_names[i] = FileSys::LANGUAGE_NAMES[language_index]; + } else { + // Not a catastrophe, unlikely to happen + LOG_WARNING(Loader, "Invalid language index {}", language_index); + } + } + } + + // Get first matching icon VirtualFile icon_file; - for (const auto& language : FileSys::LANGUAGE_NAMES) { + for (const auto& language : priority_language_names) { icon_file = extracted->GetFile(std::string("icon_").append(language).append(".dat")); if (icon_file != nullptr) { break; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 0c7aee1b8..dc45169ad 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -69,7 +69,7 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in void nvhost_nvdec::OnOpen(DeviceFD fd) { LOG_INFO(Service_NVDRV, "NVDEC video stream started"); - system.AudioCore().SetNVDECActive(true); + system.SetNVDECActive(true); } void nvhost_nvdec::OnClose(DeviceFD fd) { @@ -79,7 +79,7 @@ void nvhost_nvdec::OnClose(DeviceFD fd) { if (iter != host1x_file.fd_to_id.end()) { system.GPU().ClearCdmaInstance(iter->second); } - system.AudioCore().SetNVDECActive(false); + system.SetNVDECActive(false); } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index 4988e6e17..da2d5890f 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -324,6 +324,10 @@ s64 Nvnflinger::GetNextTicks() const { speed_scale = 0.01f; } } + if (system.GetNVDECActive() && settings.use_video_framerate.GetValue()) { + // Run at intended presentation rate during video playback. + speed_scale = 1.f; + } // As an extension, treat nonpositive swap interval as framerate multiplier. const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast<f32>(1 - swap_interval) diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 94e3000ba..bf6439530 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -133,8 +133,8 @@ add_library(video_core STATIC renderer_opengl/gl_shader_util.h renderer_opengl/gl_state_tracker.cpp renderer_opengl/gl_state_tracker.h - renderer_opengl/gl_stream_buffer.cpp - renderer_opengl/gl_stream_buffer.h + renderer_opengl/gl_staging_buffer_pool.cpp + renderer_opengl/gl_staging_buffer_pool.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h renderer_opengl/gl_texture_cache_base.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f1ad5f7cb..2f281b370 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -478,7 +478,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { if (committed_ranges.empty()) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - async_buffers.emplace_back(std::optional<Async_Buffer>{}); } return; @@ -539,7 +538,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { committed_ranges.clear(); if (downloads.empty()) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - async_buffers.emplace_back(std::optional<Async_Buffer>{}); } return; @@ -691,7 +689,7 @@ void BufferCache<P>::BindHostIndexBuffer() { const u32 size = channel_state->index_buffer.size; const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { auto upload_staging = runtime.UploadStagingBuffer(size); std::array<BufferCopy, 1> copies{ {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; @@ -1462,7 +1460,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, template <class P> void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span<BufferCopy> copies) { - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { MappedUploadMemory(buffer, total_size_bytes, copies); } else { ImmediateUploadMemory(buffer, largest_copy, copies); @@ -1473,7 +1471,7 @@ template <class P> void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, [[maybe_unused]] u64 largest_copy, [[maybe_unused]] std::span<const BufferCopy> copies) { - if constexpr (!USE_MEMORY_MAPS) { + if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) { std::span<u8> immediate_buffer; for (const BufferCopy& copy : copies) { std::span<const u8> upload_span; @@ -1532,7 +1530,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, auto& buffer = slot_buffers[buffer_id]; SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { auto upload_staging = runtime.UploadStagingBuffer(copy_size); std::array copies{BufferCopy{ .src_offset = upload_staging.offset, diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index c689fe06b..60a1f285e 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -173,6 +173,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6d3bda192..c419714d4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { return views.back().texture.handle; } -BufferCacheRuntime::BufferCacheRuntime(const Device& device_) - : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, +BufferCacheRuntime::BufferCacheRuntime(const Device& device_, + StagingBufferPool& staging_buffer_pool_) + : device{device_}, staging_buffer_pool{staging_buffer_pool_}, + has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { @@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) }(); } +StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestUploadBuffer(size); +} + +StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestDownloadBuffer(size); +} + u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { if (device.CanReportMemoryUsage()) { return device_access_memory - device.GetCurrentDedicatedVideoMemory(); @@ -147,15 +157,49 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { return 2_GiB; } -void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, - std::span<const VideoCommon::BufferCopy> copies) { +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + if (barrier) { + PreCopyBarrier(); + } for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData( - src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), - static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); + glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset), + static_cast<GLintptr>(copy.dst_offset), + static_cast<GLsizeiptr>(copy.size)); + } + if (barrier) { + PostCopyBarrier(); } } +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier); +} + +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); +} + +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies) { + CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); +} + +void BufferCacheRuntime::PreCopyBarrier() { + // TODO: finer grained barrier? + glMemoryBarrier(GL_ALL_BARRIER_BITS); +} + +void BufferCacheRuntime::PostCopyBarrier() { + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); +} + +void BufferCacheRuntime::Finish() { + glFinish(); +} + void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 18d3c3ac0..a24991585 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -12,7 +12,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" namespace OpenGL { @@ -60,11 +60,28 @@ class BufferCacheRuntime { public: static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); - explicit BufferCacheRuntime(const Device& device_); + explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_); + + [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); + + [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); + + void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + + void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + + void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, std::span<const VideoCommon::BufferCopy> copies); + void PreCopyBarrier(); + void PostCopyBarrier(); + void Finish(); + void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); @@ -169,6 +186,7 @@ private: }; const Device& device; + StagingBufferPool& staging_buffer_pool; bool has_fast_buffer_sub_data = false; bool use_assembly_shaders = false; @@ -201,7 +219,7 @@ private: struct BufferCacheParams { using Runtime = OpenGL::BufferCacheRuntime; using Buffer = OpenGL::Buffer; - using Async_Buffer = u32; + using Async_Buffer = OpenGL::StagingBufferMap; using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; static constexpr bool IS_OPENGL = true; @@ -209,9 +227,12 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; + + // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; }; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f5baa0f3c..fc711c44a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -24,6 +24,7 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" @@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra StateTracker& state_tracker_) : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), - texture_cache_runtime(device, program_manager, state_tracker), - texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device), + texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), + texture_cache(texture_cache_runtime, *this), + buffer_cache_runtime(device, staging_buffer_pool), buffer_cache(*this, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 410d8ffc5..a73ad15c1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -230,6 +230,7 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; + StagingBufferPool staging_buffer_pool; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp new file mode 100644 index 000000000..bbb06e51f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp @@ -0,0 +1,150 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <array> +#include <memory> +#include <span> + +#include <glad/glad.h> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" + +MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192)); + +namespace OpenGL { + +StagingBufferMap::~StagingBufferMap() { + if (sync) { + sync->Create(); + } +} + +StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) + : storage_flags{storage_flags_}, map_flags{map_flags_} {} + +StagingBuffers::~StagingBuffers() = default; + +StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { + MICROPROFILE_SCOPE(OpenGL_BufferRequest); + + const size_t index = RequestBuffer(requested_size); + OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; + sync_indices[index] = insert_fence ? ++current_sync_index : 0; + return StagingBufferMap{ + .mapped_span = std::span(maps[index], requested_size), + .sync = sync, + .buffer = buffers[index].handle, + }; +} + +size_t StagingBuffers::RequestBuffer(size_t requested_size) { + if (const std::optional<size_t> index = FindBuffer(requested_size); index) { + return *index; + } + + OGLBuffer& buffer = buffers.emplace_back(); + buffer.Create(); + const auto next_pow2_size = Common::NextPow2(requested_size); + glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, + storage_flags | GL_MAP_PERSISTENT_BIT); + maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, + map_flags | GL_MAP_PERSISTENT_BIT))); + syncs.emplace_back(); + sync_indices.emplace_back(); + sizes.push_back(next_pow2_size); + + ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && + maps.size() == sizes.size()); + + return buffers.size() - 1; +} + +std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { + size_t known_unsignaled_index = current_sync_index + 1; + size_t smallest_buffer = std::numeric_limits<size_t>::max(); + std::optional<size_t> found; + const size_t num_buffers = sizes.size(); + for (size_t index = 0; index < num_buffers; ++index) { + const size_t buffer_size = sizes[index]; + if (buffer_size < requested_size || buffer_size >= smallest_buffer) { + continue; + } + if (syncs[index].handle != 0) { + if (sync_indices[index] >= known_unsignaled_index) { + // This fence is later than a fence that is known to not be signaled + continue; + } + if (!syncs[index].IsSignaled()) { + // Since this fence hasn't been signaled, it's safe to assume all later + // fences haven't been signaled either + known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); + continue; + } + syncs[index].Release(); + } + smallest_buffer = buffer_size; + found = index; + } + return found; +} + +StreamBuffer::StreamBuffer() { + static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; + buffer.Create(); + glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); + glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); + mapped_pointer = + static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); + for (OGLSync& sync : fences) { + sync.Create(); + } +} + +std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { + ASSERT(size < REGION_SIZE); + for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; + ++region) { + fences[region].Create(); + } + used_iterator = iterator; + + for (size_t region = Region(free_iterator) + 1, + region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); + region < region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); + } + if (iterator + size >= free_iterator) { + free_iterator = iterator + size; + } + if (iterator + size > STREAM_BUFFER_SIZE) { + for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { + fences[region].Create(); + } + used_iterator = 0; + iterator = 0; + free_iterator = size; + + for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); + } + } + const size_t offset = iterator; + iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); + return {std::span(mapped_pointer + offset, size), offset}; +} + +StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) { + return upload_buffers.RequestMap(size, true); +} + +StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { + return download_buffers.RequestMap(size, false); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h index 8fe927aaf..60f72d3a0 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h @@ -4,8 +4,10 @@ #pragma once #include <array> +#include <optional> #include <span> #include <utility> +#include <vector> #include <glad/glad.h> @@ -17,6 +19,35 @@ namespace OpenGL { using namespace Common::Literals; +struct StagingBufferMap { + ~StagingBufferMap(); + + std::span<u8> mapped_span; + size_t offset = 0; + OGLSync* sync; + GLuint buffer; +}; + +struct StagingBuffers { + explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); + ~StagingBuffers(); + + StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); + + size_t RequestBuffer(size_t requested_size); + + std::optional<size_t> FindBuffer(size_t requested_size); + + std::vector<OGLSync> syncs; + std::vector<OGLBuffer> buffers; + std::vector<u8*> maps; + std::vector<size_t> sizes; + std::vector<size_t> sync_indices; + GLenum storage_flags; + GLenum map_flags; + size_t current_sync_index = 0; +}; + class StreamBuffer { static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB; static constexpr size_t NUM_SYNCS = 16; @@ -48,4 +79,17 @@ private: std::array<OGLSync, NUM_SYNCS> fences; }; +class StagingBufferPool { +public: + StagingBufferPool() = default; + ~StagingBufferPool() = default; + + StagingBufferMap RequestUploadBuffer(size_t size); + StagingBufferMap RequestDownloadBuffer(size_t size); + +private: + StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; + StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; +}; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp deleted file mode 100644 index 2005c8993..000000000 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include <array> -#include <memory> -#include <span> - -#include <glad/glad.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" - -namespace OpenGL { - -StreamBuffer::StreamBuffer() { - static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; - buffer.Create(); - glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); - glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); - mapped_pointer = - static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); - for (OGLSync& sync : fences) { - sync.Create(); - } -} - -std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { - ASSERT(size < REGION_SIZE); - for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; - ++region) { - fences[region].Create(); - } - used_iterator = iterator; - - for (size_t region = Region(free_iterator) + 1, - region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); - region < region_end; ++region) { - glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); - fences[region].Release(); - } - if (iterator + size >= free_iterator) { - free_iterator = iterator + size; - } - if (iterator + size > STREAM_BUFFER_SIZE) { - for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { - fences[region].Create(); - } - used_iterator = 0; - iterator = 0; - free_iterator = size; - - for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { - glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); - fences[region].Release(); - } - } - const size_t offset = iterator; - iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); - return {std::span(mapped_pointer + offset, size), offset}; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 56d0ff869..1c5dbcdd8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -456,19 +456,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; } } - } // Anonymous namespace -ImageBufferMap::~ImageBufferMap() { - if (sync) { - sync->Create(); - } -} - TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, - StateTracker& state_tracker_) - : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager), - format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} { + StateTracker& state_tracker_, + StagingBufferPool& staging_buffer_pool_) + : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_}, + util_shaders(program_manager), format_conversion_pass{util_shaders}, + resolution{Settings::values.resolution_info} { static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; @@ -558,12 +553,12 @@ void TextureCacheRuntime::Finish() { glFinish(); } -ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { - return upload_buffers.RequestMap(size, true); +StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestUploadBuffer(size); } -ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { - return download_buffers.RequestMap(size, false); +StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestDownloadBuffer(size); } u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { @@ -648,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, is_linear ? GL_LINEAR : GL_NEAREST); } -void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { switch (image.info.type) { case ImageType::e2D: @@ -690,64 +685,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept { return device.HasASTC(); } -TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) - : storage_flags{storage_flags_}, map_flags{map_flags_} {} - -TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; - -ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, - bool insert_fence) { - const size_t index = RequestBuffer(requested_size); - OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; - return ImageBufferMap{ - .mapped_span = std::span(maps[index], requested_size), - .sync = sync, - .buffer = buffers[index].handle, - }; -} - -size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { - if (const std::optional<size_t> index = FindBuffer(requested_size); index) { - return *index; - } - - OGLBuffer& buffer = buffers.emplace_back(); - buffer.Create(); - glNamedBufferStorage(buffer.handle, requested_size, nullptr, - storage_flags | GL_MAP_PERSISTENT_BIT); - maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size, - map_flags | GL_MAP_PERSISTENT_BIT))); - - syncs.emplace_back(); - sizes.push_back(requested_size); - - ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && - maps.size() == sizes.size()); - - return buffers.size() - 1; -} - -std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { - size_t smallest_buffer = std::numeric_limits<size_t>::max(); - std::optional<size_t> found; - const size_t num_buffers = sizes.size(); - for (size_t index = 0; index < num_buffers; ++index) { - const size_t buffer_size = sizes[index]; - if (buffer_size < requested_size || buffer_size >= smallest_buffer) { - continue; - } - if (syncs[index].handle != 0) { - if (!syncs[index].IsSignaled()) { - continue; - } - syncs[index].Release(); - } - smallest_buffer = buffer_size; - found = index; - } - return found; -} - Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { @@ -823,7 +760,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset, } } -void Image::UploadMemory(const ImageBufferMap& map, +void Image::UploadMemory(const StagingBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { UploadMemory(map.buffer, map.offset, copies); } @@ -870,7 +807,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b } } -void Image::DownloadMemory(ImageBufferMap& map, +void Image::DownloadMemory(StagingBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { DownloadMemory(map.buffer, map.offset, copies); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3e9b3302b..1148b73d7 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -11,6 +11,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -37,15 +38,6 @@ using VideoCommon::Region2D; using VideoCommon::RenderTargets; using VideoCommon::SlotVector; -struct ImageBufferMap { - ~ImageBufferMap(); - - std::span<u8> mapped_span; - size_t offset = 0; - OGLSync* sync; - GLuint buffer; -}; - struct FormatProperties { GLenum compatibility_class; bool compatibility_by_size; @@ -74,14 +66,15 @@ class TextureCacheRuntime { public: explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, - StateTracker& state_tracker); + StateTracker& state_tracker, + StagingBufferPool& staging_buffer_pool); ~TextureCacheRuntime(); void Finish(); - ImageBufferMap UploadStagingBuffer(size_t size); + StagingBufferMap UploadStagingBuffer(size_t size); - ImageBufferMap DownloadStagingBuffer(size_t size); + StagingBufferMap DownloadStagingBuffer(size_t size); u64 GetDeviceLocalMemory() const { return device_access_memory; @@ -120,7 +113,7 @@ public: const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void AccelerateImageUpload(Image& image, const ImageBufferMap& map, + void AccelerateImageUpload(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void InsertUploadMemoryBarrier(); @@ -149,35 +142,16 @@ public: } private: - struct StagingBuffers { - explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); - ~StagingBuffers(); - - ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); - - size_t RequestBuffer(size_t requested_size); - - std::optional<size_t> FindBuffer(size_t requested_size); - - std::vector<OGLSync> syncs; - std::vector<OGLBuffer> buffers; - std::vector<u8*> maps; - std::vector<size_t> sizes; - GLenum storage_flags; - GLenum map_flags; - }; - const Device& device; StateTracker& state_tracker; + StagingBufferPool& staging_buffer_pool; + UtilShaders util_shaders; FormatConversionPass format_conversion_pass; std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; bool has_broken_texture_view_formats = false; - StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; - StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; - OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; @@ -213,7 +187,7 @@ public: void UploadMemory(GLuint buffer_handle, size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies); - void UploadMemory(const ImageBufferMap& map, + void UploadMemory(const StagingBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, @@ -222,7 +196,8 @@ public: void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies); - void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); + void DownloadMemory(StagingBufferMap& map, + std::span<const VideoCommon::BufferImageCopy> copies); GLuint StorageHandle() noexcept; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 2c7ac210b..544982d18 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -19,6 +19,7 @@ #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/accelerated_swizzle.h" @@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) UtilShaders::~UtilShaders() = default; -void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, +void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles) { static constexpr GLuint BINDING_INPUT_BUFFER = 0; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; @@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; @@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; @@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, +void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_INPUT_BUFFER = 0; diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 9013808e7..feecd404c 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -16,23 +16,23 @@ namespace OpenGL { class Image; class ProgramManager; -struct ImageBufferMap; +struct StagingBufferMap; class UtilShaders { public: explicit UtilShaders(ProgramManager& program_manager); ~UtilShaders(); - void ASTCDecode(Image& image, const ImageBufferMap& map, + void ASTCDecode(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, + void BlockLinearUpload2D(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, + void BlockLinearUpload3D(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void PitchUpload(Image& image, const ImageBufferMap& map, + void PitchUpload(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void CopyBC4(Image& dst_image, Image& src_image, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 794dd0758..92b4f7859 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -157,6 +157,7 @@ struct BufferCacheParams { static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true; }; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2cf082c5d..c7f7448e9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -850,15 +850,11 @@ void TextureCache<P>::PopAsyncFlushes() { template <class P> ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) { const ImageInfo dst_info(operand); - const ImageId dst_id = FindDMAImage(dst_info, operand.address); - if (!dst_id) { - return NULL_IMAGE_ID; - } - auto& image = slot_images[dst_id]; - if (False(image.flags & ImageFlagBits::GpuModified)) { - // No need to waste time on an image that's synced with guest + const ImageId image_id = FindDMAImage(dst_info, operand.address); + if (!image_id) { return NULL_IMAGE_ID; } + auto& image = slot_images[image_id]; if (!is_upload && !image.info.dma_downloaded) { // Force a full sync. image.info.dma_downloaded = true; @@ -868,7 +864,7 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo if (!base) { return NULL_IMAGE_ID; } - return dst_id; + return image_id; } template <class P> diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index b692b4be4..d62a103a1 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -85,7 +85,6 @@ // Define extensions which must be supported. #define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ - EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \ EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME) \ EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \ @@ -105,6 +104,7 @@ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \ EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \ EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \ @@ -141,9 +141,6 @@ FEATURE_NAME(features, vertexPipelineStoresAndAtomics) \ FEATURE_NAME(features, wideLines) \ FEATURE_NAME(host_query_reset, hostQueryReset) \ - FEATURE_NAME(robustness2, nullDescriptor) \ - FEATURE_NAME(robustness2, robustBufferAccess2) \ - FEATURE_NAME(robustness2, robustImageAccess2) \ FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \ FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \ FEATURE_NAME(variable_pointer, variablePointers) \ @@ -156,6 +153,9 @@ FEATURE_NAME(index_type_uint8, indexTypeUint8) \ FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \ FEATURE_NAME(provoking_vertex, provokingVertexLast) \ + FEATURE_NAME(robustness2, nullDescriptor) \ + FEATURE_NAME(robustness2, robustBufferAccess2) \ + FEATURE_NAME(robustness2, robustImageAccess2) \ FEATURE_NAME(shader_float16_int8, shaderFloat16) \ FEATURE_NAME(shader_float16_int8, shaderInt8) \ FEATURE_NAME(timeline_semaphore, timelineSemaphore) \ diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 896863f87..0463ac8b9 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -42,6 +42,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { Settings::values.use_vulkan_driver_pipeline_cache.GetValue()); ui->enable_compute_pipelines_checkbox->setChecked( Settings::values.enable_compute_pipelines.GetValue()); + ui->use_video_framerate_checkbox->setChecked(Settings::values.use_video_framerate.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setCurrentIndex( @@ -91,6 +92,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.enable_compute_pipelines, ui->enable_compute_pipelines_checkbox, enable_compute_pipelines); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_video_framerate, + ui->use_video_framerate_checkbox, use_video_framerate); } void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { @@ -125,6 +128,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { Settings::values.max_anisotropy.UsingGlobal()); ui->enable_compute_pipelines_checkbox->setEnabled( Settings::values.enable_compute_pipelines.UsingGlobal()); + ui->use_video_framerate_checkbox->setEnabled( + Settings::values.use_video_framerate.UsingGlobal()); return; } @@ -149,6 +154,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->enable_compute_pipelines_checkbox, Settings::values.enable_compute_pipelines, enable_compute_pipelines); + ConfigurationShared::SetColoredTristate(ui->use_video_framerate_checkbox, + Settings::values.use_video_framerate, + use_video_framerate); ConfigurationShared::SetColoredComboBox( ui->gpu_accuracy, ui->label_gpu_accuracy, static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 1c7b636b9..a4dc8ceb0 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -47,6 +47,7 @@ private: ConfigurationShared::CheckState use_fast_gpu_time; ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache; ConfigurationShared::CheckState enable_compute_pipelines; + ConfigurationShared::CheckState use_video_framerate; const Core::System& system; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 37757a918..e7f0ef6be 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -192,6 +192,16 @@ Compute pipelines are always enabled on all other drivers.</string> </widget> </item> <item> + <widget class="QCheckBox" name="use_video_framerate_checkbox"> + <property name="toolTip"> + <string>Run the game at normal speed during video playback, even when the framerate is unlocked.</string> + </property> + <property name="text"> + <string>Sync to framerate of video playback</string> + </property> + </widget> + </item> + <item> <widget class="QWidget" name="af_layout" native="true"> <layout class="QHBoxLayout" name="horizontalLayout_1"> <property name="leftMargin"> diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 82bce9a3a..145fea5f1 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -3491,6 +3491,7 @@ void GMainWindow::ResetWindowSize1080() { void GMainWindow::OnConfigure() { const auto old_theme = UISettings::values.theme; const bool old_discord_presence = UISettings::values.enable_discord_presence.GetValue(); + const auto old_language_index = Settings::values.language_index.GetValue(); Settings::SetConfiguringGlobal(true); ConfigureDialog configure_dialog(this, hotkey_registry, input_subsystem.get(), *system, @@ -3559,7 +3560,7 @@ void GMainWindow::OnConfigure() { emit UpdateThemedIcons(); const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); - if (reload) { + if (reload || Settings::values.language_index.GetValue() != old_language_index) { game_list->PopulateAsync(UISettings::values.game_dirs); } |