summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt90
-rw-r--r--src/common/CMakeLists.txt3
-rw-r--r--src/common/common_funcs.h29
-rw-r--r--src/common/error.cpp (renamed from src/common/misc.cpp)6
-rw-r--r--src/common/error.h21
-rw-r--r--src/common/settings.cpp4
-rw-r--r--src/common/settings.h8
-rw-r--r--src/common/thread.cpp6
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/cpu_manager.cpp25
-rw-r--r--src/core/cpu_manager.h6
-rw-r--r--src/core/file_sys/kernel_executable.h1
-rw-r--r--src/core/hle/api_version.h17
-rw-r--r--src/core/hle/kernel/kernel.h1
-rw-r--r--src/core/hle/service/acc/acc.cpp66
-rw-r--r--src/core/hle/service/acc/async_context.cpp68
-rw-r--r--src/core/hle/service/acc/async_context.h37
-rw-r--r--src/core/hle/service/am/am.cpp16
-rw-r--r--src/core/hle/service/am/am.h2
-rw-r--r--src/core/hle/service/filesystem/filesystem.cpp26
-rw-r--r--src/core/hle/service/hid/controllers/touchscreen.h14
-rw-r--r--src/core/hle/service/hid/hid.cpp14
-rw-r--r--src/core/hle/service/hid/hid.h1
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp9
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp25
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h15
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp17
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h3
-rw-r--r--src/core/hle/service/vi/display/vi_display.cpp17
-rw-r--r--src/core/hle/service/vi/display/vi_display.h13
-rw-r--r--src/core/network/network.cpp5
-rw-r--r--src/core/telemetry_session.cpp16
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp122
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp56
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp47
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp41
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp224
-rw-r--r--src/video_core/command_classes/codecs/codec.h10
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp3
-rw-r--r--src/video_core/engines/maxwell_3d.h8
-rw-r--r--src/video_core/memory_manager.cpp2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp25
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp40
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp30
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h14
-rw-r--r--src/video_core/shader_environment.cpp1
-rw-r--r--src/video_core/texture_cache/slot_vector.h1
-rw-r--r--src/video_core/video_core.cpp3
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp16
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h11
-rw-r--r--src/yuzu/configuration/config.cpp7
-rw-r--r--src/yuzu/configuration/config.h1
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp42
-rw-r--r--src/yuzu/configuration/configure_graphics.h1
-rw-r--r--src/yuzu/configuration/configure_graphics.ui53
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui4
-rw-r--r--src/yuzu/game_list.cpp8
-rw-r--r--src/yuzu/main.cpp20
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h6
69 files changed, 1049 insertions, 374 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0e064ab44..5df2ff3fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -518,6 +518,10 @@ set(FFmpeg_COMPONENTS
avutil
swscale)
+if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+ Include(FindPkgConfig REQUIRED)
+ pkg_check_modules(LIBVA libva)
+endif()
if (NOT YUZU_USE_BUNDLED_FFMPEG)
# Use system installed FFmpeg
find_package(FFmpeg QUIET COMPONENTS ${FFmpeg_COMPONENTS})
@@ -540,6 +544,9 @@ endif()
if (YUZU_USE_BUNDLED_FFMPEG)
if (NOT WIN32)
+ # TODO(lat9nq): Move this to externals/ffmpeg/CMakeLists.txt (and move externals/ffmpeg to
+ # externals/ffmpeg/ffmpeg)
+
# Build FFmpeg from externals
message(STATUS "Using FFmpeg from externals")
@@ -579,20 +586,23 @@ if (YUZU_USE_BUNDLED_FFMPEG)
CACHE PATH "Paths to FFmpeg libraries" FORCE)
endforeach()
- set(FFmpeg_INCLUDE_DIR
- "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}"
- CACHE PATH "Path to FFmpeg headers" FORCE)
+ Include(FindPkgConfig REQUIRED)
+ pkg_check_modules(LIBVA libva)
+ pkg_check_modules(CUDA cuda)
+ pkg_check_modules(FFNVCODEC ffnvcodec)
+ pkg_check_modules(VDPAU vdpau)
+
+ set(FFmpeg_HWACCEL_LIBRARIES)
+ set(FFmpeg_HWACCEL_FLAGS)
+ set(FFmpeg_HWACCEL_INCLUDE_DIRS)
+ set(FFmpeg_HWACCEL_LDFLAGS)
- if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
- Include(FindPkgConfig REQUIRED)
- pkg_check_modules(LIBVA libva)
- endif()
if(LIBVA_FOUND)
pkg_check_modules(LIBDRM libdrm REQUIRED)
find_package(X11 REQUIRED)
pkg_check_modules(LIBVA-DRM libva-drm REQUIRED)
pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED)
- set(FFmpeg_LIBVA_LIBRARIES
+ list(APPEND FFmpeg_HWACCEL_LIBRARIES
${LIBDRM_LIBRARIES}
${X11_LIBRARIES}
${LIBVA-DRM_LIBRARIES}
@@ -602,11 +612,56 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--enable-hwaccel=h264_vaapi
--enable-hwaccel=vp9_vaapi
--enable-libdrm)
+ list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
+ ${LIBDRM_INCLUDE_DIRS}
+ ${X11_INCLUDE_DIRS}
+ ${LIBVA-DRM_INCLUDE_DIRS}
+ ${LIBVA-X11_INCLUDE_DIRS}
+ ${LIBVA_INCLUDE_DIRS}
+ )
message(STATUS "VA-API found")
else()
set(FFmpeg_HWACCEL_FLAGS --disable-vaapi)
endif()
+ if (FFNVCODEC_FOUND AND CUDA_FOUND)
+ list(APPEND FFmpeg_HWACCEL_FLAGS
+ --enable-cuvid
+ --enable-ffnvcodec
+ --enable-nvdec
+ --enable-hwaccel=h264_nvdec
+ --enable-hwaccel=vp9_nvdec
+ --extra-cflags=-I${CUDA_INCLUDE_DIRS}
+ )
+ list(APPEND FFmpeg_HWACCEL_LIBRARIES
+ ${FFNVCODEC_LIBRARIES}
+ ${CUDA_LIBRARIES}
+ )
+ list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
+ ${FFNVCODEC_INCLUDE_DIRS}
+ ${CUDA_INCLUDE_DIRS}
+ )
+ list(APPEND FFmpeg_HWACCEL_LDFLAGS
+ ${FFNVCODEC_LDFLAGS}
+ ${CUDA_LDFLAGS}
+ )
+ message(STATUS "ffnvcodec libraries version ${FFNVCODEC_VERSION} found")
+ endif()
+
+ if (VDPAU_FOUND)
+ list(APPEND FFmpeg_HWACCEL_FLAGS
+ --enable-vdpau
+ --enable-hwaccel=h264_vdpau
+ --enable-hwaccel=vp9_vdpau
+ )
+ list(APPEND FFmpeg_HWACCEL_LIBRARIES ${VDPAU_LIBRARIES})
+ list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS ${VDPAU_INCLUDE_DIRS})
+ list(APPEND FFmpeg_HWACCEL_LDFLAGS ${VDPAU_LDFLAGS})
+ message(STATUS "vdpau libraries version ${VDPAU_VERSION} found")
+ else()
+ list(APPEND FFmpeg_HWACCEL_FLAGS --disable-vdpau)
+ endif()
+
# `configure` parameters builds only exactly what yuzu needs from FFmpeg
# `--disable-vdpau` is needed to avoid linking issues
add_custom_command(
@@ -624,7 +679,6 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--disable-network
--disable-postproc
--disable-swresample
- --disable-vdpau
--enable-decoder=h264
--enable-decoder=vp9
--cc="${CMAKE_C_COMPILER}"
@@ -653,15 +707,26 @@ if (YUZU_USE_BUNDLED_FFMPEG)
${FFmpeg_BUILD_DIR}
)
+ set(FFmpeg_INCLUDE_DIR
+ "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR};${FFmpeg_HWACCEL_INCLUDE_DIRS}"
+ CACHE PATH "Path to FFmpeg headers" FORCE)
+
+ set(FFmpeg_LDFLAGS
+ "${FFmpeg_HWACCEL_LDFLAGS}"
+ CACHE STRING "FFmpeg linker flags" FORCE)
+
# ALL makes this custom target build every time
# but it won't actually build if the DEPENDS parameter is up to date
add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE})
add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure)
link_libraries(${FFmpeg_LIBVA_LIBRARIES})
- set(FFmpeg_LIBRARIES ${FFmpeg_LIBVA_LIBRARIES} ${FFmpeg_BUILD_LIBRARIES}
+ set(FFmpeg_LIBRARIES ${FFmpeg_BUILD_LIBRARIES} ${FFmpeg_HWACCEL_LIBRARIES}
CACHE PATH "Paths to FFmpeg libraries" FORCE)
unset(FFmpeg_BUILD_LIBRARIES)
- unset(FFmpeg_LIBVA_LIBRARIES)
+ unset(FFmpeg_HWACCEL_FLAGS)
+ unset(FFmpeg_HWACCEL_INCLUDE_DIRS)
+ unset(FFmpeg_HWACCEL_LDFLAGS)
+ unset(FFmpeg_HWACCEL_LIBRARIES)
if (FFmpeg_FOUND)
message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}")
@@ -670,12 +735,13 @@ if (YUZU_USE_BUNDLED_FFMPEG)
endif()
else() # WIN32
# Use yuzu FFmpeg binaries
- set(FFmpeg_EXT_NAME "ffmpeg-4.3.1")
+ set(FFmpeg_EXT_NAME "ffmpeg-4.4")
set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}")
download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "")
set(FFmpeg_FOUND YES)
set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE)
set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE)
+ set(FFmpeg_LDFLAGS "" CACHE STRING "FFmpeg linker flags" FORCE)
set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE)
set(FFmpeg_LIBRARIES
${FFmpeg_LIBRARY_DIR}/swscale.lib
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 57922b51c..b18a2a2f5 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -53,6 +53,8 @@ add_library(common STATIC
div_ceil.h
dynamic_library.cpp
dynamic_library.h
+ error.cpp
+ error.h
fiber.cpp
fiber.h
fs/file.cpp
@@ -88,7 +90,6 @@ add_library(common STATIC
microprofile.cpp
microprofile.h
microprofileui.h
- misc.cpp
nvidia_flags.cpp
nvidia_flags.h
page_table.cpp
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 53bd7da60..4c1e29de6 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -4,9 +4,8 @@
#pragma once
-#include <algorithm>
#include <array>
-#include <string>
+#include <iterator>
#if !defined(ARCHITECTURE_x86_64)
#include <cstdlib> // for exit
@@ -49,16 +48,6 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
#endif // _MSC_VER ndef
-// Generic function to get last error message.
-// Call directly after the command or use the error num.
-// This function might change the error code.
-// Defined in misc.cpp.
-[[nodiscard]] std::string GetLastErrorMsg();
-
-// Like GetLastErrorMsg(), but passing an explicit error code.
-// Defined in misc.cpp.
-[[nodiscard]] std::string NativeErrorToString(int e);
-
#define DECLARE_ENUM_FLAG_OPERATORS(type) \
[[nodiscard]] constexpr type operator|(type a, type b) noexcept { \
using T = std::underlying_type_t<type>; \
@@ -72,6 +61,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
using T = std::underlying_type_t<type>; \
return static_cast<type>(static_cast<T>(a) ^ static_cast<T>(b)); \
} \
+ [[nodiscard]] constexpr type operator<<(type a, type b) noexcept { \
+ using T = std::underlying_type_t<type>; \
+ return static_cast<type>(static_cast<T>(a) << static_cast<T>(b)); \
+ } \
+ [[nodiscard]] constexpr type operator>>(type a, type b) noexcept { \
+ using T = std::underlying_type_t<type>; \
+ return static_cast<type>(static_cast<T>(a) >> static_cast<T>(b)); \
+ } \
constexpr type& operator|=(type& a, type b) noexcept { \
a = a | b; \
return a; \
@@ -84,6 +81,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
a = a ^ b; \
return a; \
} \
+ constexpr type& operator<<=(type& a, type b) noexcept { \
+ a = a << b; \
+ return a; \
+ } \
+ constexpr type& operator>>=(type& a, type b) noexcept { \
+ a = a >> b; \
+ return a; \
+ } \
[[nodiscard]] constexpr type operator~(type key) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(~static_cast<T>(key)); \
diff --git a/src/common/misc.cpp b/src/common/error.cpp
index 495385b9e..d4455e310 100644
--- a/src/common/misc.cpp
+++ b/src/common/error.cpp
@@ -10,7 +10,9 @@
#include <cstring>
#endif
-#include "common/common_funcs.h"
+#include "common/error.h"
+
+namespace Common {
std::string NativeErrorToString(int e) {
#ifdef _WIN32
@@ -50,3 +52,5 @@ std::string GetLastErrorMsg() {
return NativeErrorToString(errno);
#endif
}
+
+} // namespace Common
diff --git a/src/common/error.h b/src/common/error.h
new file mode 100644
index 000000000..e084d4b0f
--- /dev/null
+++ b/src/common/error.h
@@ -0,0 +1,21 @@
+// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+namespace Common {
+
+// Generic function to get last error message.
+// Call directly after the command or use the error num.
+// This function might change the error code.
+// Defined in error.cpp.
+[[nodiscard]] std::string GetLastErrorMsg();
+
+// Like GetLastErrorMsg(), but passing an explicit error code.
+// Defined in error.cpp.
+[[nodiscard]] std::string NativeErrorToString(int e);
+
+} // namespace Common
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index fd3b639cd..0d2df80a8 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -54,7 +54,7 @@ void LogSettings() {
log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
log_setting("Renderer_UseAsynchronousGpuEmulation",
values.use_asynchronous_gpu_emulation.GetValue());
- log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
+ log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
@@ -136,7 +136,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.use_disk_shader_cache.SetGlobal(true);
values.gpu_accuracy.SetGlobal(true);
values.use_asynchronous_gpu_emulation.SetGlobal(true);
- values.use_nvdec_emulation.SetGlobal(true);
+ values.nvdec_emulation.SetGlobal(true);
values.accelerate_astc.SetGlobal(true);
values.use_vsync.SetGlobal(true);
values.shader_backend.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index ec4d381e8..b7195670b 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -48,6 +48,12 @@ enum class FullscreenMode : u32 {
Exclusive = 1,
};
+enum class NvdecEmulation : u32 {
+ Off = 0,
+ CPU = 1,
+ GPU = 2,
+};
+
/** The BasicSetting class is a simple resource manager. It defines a label and default value
* alongside the actual value of the setting for simpler and less-error prone use with frontend
* configurations. Setting a default value and label is required, though subclasses may deviate from
@@ -466,7 +472,7 @@ struct Values {
RangedSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal,
GPUAccuracy::Extreme, "gpu_accuracy"};
Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
- Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
+ Setting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
Setting<bool> accelerate_astc{true, "accelerate_astc"};
Setting<bool> use_vsync{true, "use_vsync"};
BasicRangedSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"};
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index d2c1ac60d..946a1114d 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -2,7 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "common/common_funcs.h"
+#include <string>
+
+#include "common/error.h"
#include "common/logging/log.h"
#include "common/thread.h"
#ifdef __APPLE__
@@ -21,8 +23,6 @@
#include <unistd.h>
#endif
-#include <string>
-
#ifdef __FreeBSD__
#define cpu_set_t cpuset_t
#endif
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 87d47e2e5..7140d0db8 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -263,6 +263,8 @@ add_library(core STATIC
hle/service/acc/acc_u0.h
hle/service/acc/acc_u1.cpp
hle/service/acc/acc_u1.h
+ hle/service/acc/async_context.cpp
+ hle/service/acc/async_context.h
hle/service/acc/errors.h
hle/service/acc/profile_manager.cpp
hle/service/acc/profile_manager.h
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 7e195346b..77efcabf0 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -21,34 +21,25 @@ namespace Core {
CpuManager::CpuManager(System& system_) : system{system_} {}
CpuManager::~CpuManager() = default;
-void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
- cpu_manager.RunThread(core);
+void CpuManager::ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager,
+ std::size_t core) {
+ cpu_manager.RunThread(stop_token, core);
}
void CpuManager::Initialize() {
running_mode = true;
if (is_multicore) {
for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
- core_data[core].host_thread =
- std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
+ core_data[core].host_thread = std::jthread(ThreadStart, std::ref(*this), core);
}
} else {
- core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0);
+ core_data[0].host_thread = std::jthread(ThreadStart, std::ref(*this), 0);
}
}
void CpuManager::Shutdown() {
running_mode = false;
Pause(false);
- if (is_multicore) {
- for (auto& data : core_data) {
- data.host_thread->join();
- data.host_thread.reset();
- }
- } else {
- core_data[0].host_thread->join();
- core_data[0].host_thread.reset();
- }
}
std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() {
@@ -317,7 +308,7 @@ void CpuManager::Pause(bool paused) {
}
}
-void CpuManager::RunThread(std::size_t core) {
+void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) {
/// Initialization
system.RegisterCoreThread(core);
std::string name;
@@ -361,6 +352,10 @@ void CpuManager::RunThread(std::size_t core) {
return;
}
+ if (stop_token.stop_requested()) {
+ break;
+ }
+
auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
data.is_running = true;
Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext());
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index 140263b09..9d92d4af0 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -78,9 +78,9 @@ private:
void SingleCoreRunSuspendThread();
void SingleCorePause(bool paused);
- static void ThreadStart(CpuManager& cpu_manager, std::size_t core);
+ static void ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, std::size_t core);
- void RunThread(std::size_t core);
+ void RunThread(std::stop_token stop_token, std::size_t core);
struct CoreData {
std::shared_ptr<Common::Fiber> host_context;
@@ -89,7 +89,7 @@ private:
std::atomic<bool> is_running;
std::atomic<bool> is_paused;
std::atomic<bool> initialized;
- std::unique_ptr<std::thread> host_thread;
+ std::jthread host_thread;
};
std::atomic<bool> running_mode{};
diff --git a/src/core/file_sys/kernel_executable.h b/src/core/file_sys/kernel_executable.h
index 044c554d3..79ca82f8b 100644
--- a/src/core/file_sys/kernel_executable.h
+++ b/src/core/file_sys/kernel_executable.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <string>
#include <vector>
#include "common/common_funcs.h"
diff --git a/src/core/hle/api_version.h b/src/core/hle/api_version.h
index 43d5670a9..626e30753 100644
--- a/src/core/hle/api_version.h
+++ b/src/core/hle/api_version.h
@@ -28,13 +28,20 @@ constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 12.1.0-1.0";
// Atmosphere version constants.
-constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 0;
-constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 19;
-constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 5;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 1;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 0;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 0;
+
+constexpr u32 AtmosphereTargetFirmwareWithRevision(u8 major, u8 minor, u8 micro, u8 rev) {
+ return u32{major} << 24 | u32{minor} << 16 | u32{micro} << 8 | u32{rev};
+}
+
+constexpr u32 AtmosphereTargetFirmware(u8 major, u8 minor, u8 micro) {
+ return AtmosphereTargetFirmwareWithRevision(major, minor, micro, 0);
+}
constexpr u32 GetTargetFirmware() {
- return u32{HOS_VERSION_MAJOR} << 24 | u32{HOS_VERSION_MINOR} << 16 |
- u32{HOS_VERSION_MICRO} << 8 | 0U;
+ return AtmosphereTargetFirmware(HOS_VERSION_MAJOR, HOS_VERSION_MINOR, HOS_VERSION_MICRO);
}
} // namespace HLE::ApiVersion
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 3a6db0b1c..901d43da9 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 882fc1492..6d9ec0a8a 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -23,6 +23,7 @@
#include "core/hle/service/acc/acc_su.h"
#include "core/hle/service/acc/acc_u0.h"
#include "core/hle/service/acc/acc_u1.h"
+#include "core/hle/service/acc/async_context.h"
#include "core/hle/service/acc/errors.h"
#include "core/hle/service/acc/profile_manager.h"
#include "core/hle/service/glue/arp.h"
@@ -454,22 +455,6 @@ public:
: IProfileCommon{system_, "IProfileEditor", true, user_id_, profile_manager_} {}
};
-class IAsyncContext final : public ServiceFramework<IAsyncContext> {
-public:
- explicit IAsyncContext(Core::System& system_) : ServiceFramework{system_, "IAsyncContext"} {
- // clang-format off
- static const FunctionInfo functions[] = {
- {0, nullptr, "GetSystemEvent"},
- {1, nullptr, "Cancel"},
- {2, nullptr, "HasDone"},
- {3, nullptr, "GetResult"},
- };
- // clang-format on
-
- RegisterHandlers(functions);
- }
-};
-
class ISessionObject final : public ServiceFramework<ISessionObject> {
public:
explicit ISessionObject(Core::System& system_, Common::UUID)
@@ -504,16 +489,44 @@ public:
}
};
+class EnsureTokenIdCacheAsyncInterface final : public IAsyncContext {
+public:
+ explicit EnsureTokenIdCacheAsyncInterface(Core::System& system_) : IAsyncContext{system_} {
+ MarkComplete();
+ }
+ ~EnsureTokenIdCacheAsyncInterface() = default;
+
+ void LoadIdTokenCache(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+ }
+
+protected:
+ bool IsComplete() const override {
+ return true;
+ }
+
+ void Cancel() override {}
+
+ ResultCode GetResult() const override {
+ return ResultSuccess;
+ }
+};
+
class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
public:
explicit IManagerForApplication(Core::System& system_, Common::UUID user_id_)
- : ServiceFramework{system_, "IManagerForApplication"}, user_id{user_id_} {
+ : ServiceFramework{system_, "IManagerForApplication"},
+ ensure_token_id{std::make_shared<EnsureTokenIdCacheAsyncInterface>(system)},
+ user_id{user_id_} {
// clang-format off
static const FunctionInfo functions[] = {
{0, &IManagerForApplication::CheckAvailability, "CheckAvailability"},
{1, &IManagerForApplication::GetAccountId, "GetAccountId"},
- {2, nullptr, "EnsureIdTokenCacheAsync"},
- {3, nullptr, "LoadIdTokenCache"},
+ {2, &IManagerForApplication::EnsureIdTokenCacheAsync, "EnsureIdTokenCacheAsync"},
+ {3, &IManagerForApplication::LoadIdTokenCache, "LoadIdTokenCache"},
{130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"},
{150, nullptr, "CreateAuthorizationRequest"},
{160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"},
@@ -540,6 +553,20 @@ private:
rb.PushRaw<u64>(user_id.GetNintendoID());
}
+ void EnsureIdTokenCacheAsync(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface(ensure_token_id);
+ }
+
+ void LoadIdTokenCache(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+ ensure_token_id->LoadIdTokenCache(ctx);
+ }
+
void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_ACC, "(STUBBED) called");
@@ -562,6 +589,7 @@ private:
rb.Push(ResultSuccess);
}
+ std::shared_ptr<EnsureTokenIdCacheAsyncInterface> ensure_token_id{};
Common::UUID user_id{Common::INVALID_UUID};
};
diff --git a/src/core/hle/service/acc/async_context.cpp b/src/core/hle/service/acc/async_context.cpp
new file mode 100644
index 000000000..459323132
--- /dev/null
+++ b/src/core/hle/service/acc/async_context.cpp
@@ -0,0 +1,68 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/core.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/service/acc/async_context.h"
+
+namespace Service::Account {
+IAsyncContext::IAsyncContext(Core::System& system_)
+ : ServiceFramework{system_, "IAsyncContext"}, compeletion_event{system_.Kernel()} {
+
+ Kernel::KAutoObject::Create(std::addressof(compeletion_event));
+ compeletion_event.Initialize("IAsyncContext:CompletionEvent");
+
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, &IAsyncContext::GetSystemEvent, "GetSystemEvent"},
+ {1, &IAsyncContext::Cancel, "Cancel"},
+ {2, &IAsyncContext::HasDone, "HasDone"},
+ {3, &IAsyncContext::GetResult, "GetResult"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+}
+
+void IAsyncContext::GetSystemEvent(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_ACC, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 1};
+ rb.Push(ResultSuccess);
+ rb.PushCopyObjects(compeletion_event.GetReadableEvent());
+}
+
+void IAsyncContext::Cancel(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_ACC, "called");
+
+ Cancel();
+ MarkComplete();
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+}
+
+void IAsyncContext::HasDone(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_ACC, "called");
+
+ is_complete.store(IsComplete());
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.Push(is_complete.load());
+}
+
+void IAsyncContext::GetResult(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_ACC, "called");
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(GetResult());
+}
+
+void IAsyncContext::MarkComplete() {
+ is_complete.store(true);
+ compeletion_event.GetWritableEvent().Signal();
+}
+
+} // namespace Service::Account
diff --git a/src/core/hle/service/acc/async_context.h b/src/core/hle/service/acc/async_context.h
new file mode 100644
index 000000000..c694b4946
--- /dev/null
+++ b/src/core/hle/service/acc/async_context.h
@@ -0,0 +1,37 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include "core/hle/kernel/k_event.h"
+#include "core/hle/service/service.h"
+
+namespace Core {
+class System;
+}
+
+namespace Service::Account {
+
+class IAsyncContext : public ServiceFramework<IAsyncContext> {
+public:
+ explicit IAsyncContext(Core::System& system_);
+
+ void GetSystemEvent(Kernel::HLERequestContext& ctx);
+ void Cancel(Kernel::HLERequestContext& ctx);
+ void HasDone(Kernel::HLERequestContext& ctx);
+ void GetResult(Kernel::HLERequestContext& ctx);
+
+protected:
+ virtual bool IsComplete() const = 0;
+ virtual void Cancel() = 0;
+ virtual ResultCode GetResult() const = 0;
+
+ void MarkComplete();
+
+ std::atomic<bool> is_complete{false};
+ Kernel::KEvent compeletion_event;
+};
+
+} // namespace Service::Account
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index a538f82e3..c3ac73131 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1270,7 +1270,8 @@ void ILibraryAppletCreator::CreateHandleStorage(Kernel::HLERequestContext& ctx)
IApplicationFunctions::IApplicationFunctions(Core::System& system_)
: ServiceFramework{system_, "IApplicationFunctions"}, gpu_error_detected_event{system.Kernel()},
friend_invitation_storage_channel_event{system.Kernel()},
- health_warning_disappeared_system_event{system.Kernel()} {
+ notification_storage_channel_event{system.Kernel()}, health_warning_disappeared_system_event{
+ system.Kernel()} {
// clang-format off
static const FunctionInfo functions[] = {
{1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"},
@@ -1322,7 +1323,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
{131, nullptr, "SetDelayTimeToAbortOnGpuError"},
{140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"},
{141, &IApplicationFunctions::TryPopFromFriendInvitationStorageChannel, "TryPopFromFriendInvitationStorageChannel"},
- {150, nullptr, "GetNotificationStorageChannelEvent"},
+ {150, &IApplicationFunctions::GetNotificationStorageChannelEvent, "GetNotificationStorageChannelEvent"},
{151, nullptr, "TryPopFromNotificationStorageChannel"},
{160, &IApplicationFunctions::GetHealthWarningDisappearedSystemEvent, "GetHealthWarningDisappearedSystemEvent"},
{170, nullptr, "SetHdcpAuthenticationActivated"},
@@ -1340,11 +1341,14 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
Kernel::KAutoObject::Create(std::addressof(gpu_error_detected_event));
Kernel::KAutoObject::Create(std::addressof(friend_invitation_storage_channel_event));
+ Kernel::KAutoObject::Create(std::addressof(notification_storage_channel_event));
Kernel::KAutoObject::Create(std::addressof(health_warning_disappeared_system_event));
gpu_error_detected_event.Initialize("IApplicationFunctions:GpuErrorDetectedSystemEvent");
friend_invitation_storage_channel_event.Initialize(
"IApplicationFunctions:FriendInvitationStorageChannelEvent");
+ notification_storage_channel_event.Initialize(
+ "IApplicationFunctions:NotificationStorageChannelEvent");
health_warning_disappeared_system_event.Initialize(
"IApplicationFunctions:HealthWarningDisappearedSystemEvent");
}
@@ -1762,6 +1766,14 @@ void IApplicationFunctions::TryPopFromFriendInvitationStorageChannel(
rb.Push(ERR_NO_DATA_IN_CHANNEL);
}
+void IApplicationFunctions::GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_AM, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 1};
+ rb.Push(ResultSuccess);
+ rb.PushCopyObjects(notification_storage_channel_event.GetReadableEvent());
+}
+
void IApplicationFunctions::GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_AM, "called");
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 184030a8e..c13aa5787 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -295,6 +295,7 @@ private:
void GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx);
void GetFriendInvitationStorageChannelEvent(Kernel::HLERequestContext& ctx);
void TryPopFromFriendInvitationStorageChannel(Kernel::HLERequestContext& ctx);
+ void GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx);
void GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx);
bool launch_popped_application_specific = false;
@@ -302,6 +303,7 @@ private:
s32 previous_program_index{-1};
Kernel::KEvent gpu_error_detected_event;
Kernel::KEvent friend_invitation_storage_channel_event;
+ Kernel::KEvent notification_storage_channel_event;
Kernel::KEvent health_warning_disappeared_system_event;
};
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 4a9b13e45..c8d65f328 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -97,14 +97,24 @@ ResultCode VfsDirectoryServiceWrapper::DeleteFile(const std::string& path_) cons
ResultCode VfsDirectoryServiceWrapper::CreateDirectory(const std::string& path_) const {
std::string path(Common::FS::SanitizePath(path_));
- auto dir = GetDirectoryRelativeWrapped(backing, Common::FS::GetParentPath(path));
- if (dir == nullptr || Common::FS::GetFilename(Common::FS::GetParentPath(path)).empty()) {
- dir = backing;
- }
- auto new_dir = dir->CreateSubdirectory(Common::FS::GetFilename(path));
- if (new_dir == nullptr) {
- // TODO(DarkLordZach): Find a better error code for this
- return ResultUnknown;
+
+ // NOTE: This is inaccurate behavior. CreateDirectory is not recursive.
+ // CreateDirectory should return PathNotFound if the parent directory does not exist.
+ // This is here temporarily in order to have UMM "work" in the meantime.
+ // TODO (Morph): Remove this when a hardware test verifies the correct behavior.
+ const auto components = Common::FS::SplitPathComponents(path);
+ std::string relative_path;
+ for (const auto& component : components) {
+ // Skip empty path components
+ if (component.empty()) {
+ continue;
+ }
+ relative_path = Common::FS::SanitizePath(relative_path + '/' + component);
+ auto new_dir = backing->CreateSubdirectory(relative_path);
+ if (new_dir == nullptr) {
+ // TODO(DarkLordZach): Find a better error code for this
+ return ResultUnknown;
+ }
}
return ResultSuccess;
}
diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h
index ef2becefd..8e9b40c0a 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -15,6 +15,20 @@
namespace Service::HID {
class Controller_Touchscreen final : public ControllerBase {
public:
+ enum class TouchScreenModeForNx : u8 {
+ UseSystemSetting,
+ Finger,
+ Heat2,
+ };
+
+ struct TouchScreenConfigurationForNx {
+ TouchScreenModeForNx mode;
+ INSERT_PADDING_BYTES_NOINIT(0x7);
+ INSERT_PADDING_BYTES_NOINIT(0xF); // Reserved
+ };
+ static_assert(sizeof(TouchScreenConfigurationForNx) == 0x17,
+ "TouchScreenConfigurationForNx is an invalid size");
+
explicit Controller_Touchscreen(Core::System& system_);
~Controller_Touchscreen() override;
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index b8b80570d..a1707a72a 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -331,7 +331,7 @@ Hid::Hid(Core::System& system_)
{529, nullptr, "SetDisallowedPalmaConnection"},
{1000, &Hid::SetNpadCommunicationMode, "SetNpadCommunicationMode"},
{1001, &Hid::GetNpadCommunicationMode, "GetNpadCommunicationMode"},
- {1002, nullptr, "SetTouchScreenConfiguration"},
+ {1002, &Hid::SetTouchScreenConfiguration, "SetTouchScreenConfiguration"},
{1003, nullptr, "IsFirmwareUpdateNeededForNotification"},
{2000, nullptr, "ActivateDigitizer"},
};
@@ -1631,6 +1631,18 @@ void Hid::GetNpadCommunicationMode(Kernel::HLERequestContext& ctx) {
.GetNpadCommunicationMode());
}
+void Hid::SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto touchscreen_mode{rp.PopRaw<Controller_Touchscreen::TouchScreenConfigurationForNx>()};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
+
+ LOG_WARNING(Service_HID, "(STUBBED) called, touchscreen_mode={}, applet_resource_user_id={}",
+ touchscreen_mode.mode, applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+}
+
class HidDbg final : public ServiceFramework<HidDbg> {
public:
explicit HidDbg(Core::System& system_) : ServiceFramework{system_, "hid:dbg"} {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 9c5c7f252..b1fe75e94 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -159,6 +159,7 @@ private:
void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
void SetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
+ void SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx);
enum class VibrationDeviceType : u32 {
Unknown = 0,
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index ce6065db2..a33e47d0b 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect) {
- VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
+ const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
addr, offset, width, height, stride, format);
- using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
- const Tegra::FramebufferConfig framebuffer{
- addr, offset, width, height, stride, static_cast<PixelFormat>(format),
- transform, crop_rect};
+ const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format);
+ const Tegra::FramebufferConfig framebuffer{addr, offset, width, height,
+ stride, pixel_format, transform, crop_rect};
system.GetPerfStats().EndSystemFrame();
system.GPU().SwapBuffers(&framebuffer);
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 59ddf6298..b4c3a6099 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -9,17 +9,20 @@
#include "core/core.h"
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/kernel/kernel.h"
+#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
namespace Service::NVFlinger {
-BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_)
- : id(id_), layer_id(layer_id_), buffer_wait_event{kernel} {
- Kernel::KAutoObject::Create(std::addressof(buffer_wait_event));
- buffer_wait_event.Initialize("BufferQueue:WaitEvent");
+BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
+ KernelHelpers::ServiceContext& service_context_)
+ : id(id_), layer_id(layer_id_), service_context{service_context_} {
+ buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
}
-BufferQueue::~BufferQueue() = default;
+BufferQueue::~BufferQueue() {
+ service_context.CloseEvent(buffer_wait_event);
+}
void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) {
ASSERT(slot < buffer_slots);
@@ -41,7 +44,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
.multi_fence = {},
};
- buffer_wait_event.GetWritableEvent().Signal();
+ buffer_wait_event->GetWritableEvent().Signal();
}
std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
@@ -119,7 +122,7 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult
}
free_buffers_condition.notify_one();
- buffer_wait_event.GetWritableEvent().Signal();
+ buffer_wait_event->GetWritableEvent().Signal();
}
std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
@@ -154,7 +157,7 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
}
free_buffers_condition.notify_one();
- buffer_wait_event.GetWritableEvent().Signal();
+ buffer_wait_event->GetWritableEvent().Signal();
}
void BufferQueue::Connect() {
@@ -169,7 +172,7 @@ void BufferQueue::Disconnect() {
std::unique_lock lock{queue_sequence_mutex};
queue_sequence.clear();
}
- buffer_wait_event.GetWritableEvent().Signal();
+ buffer_wait_event->GetWritableEvent().Signal();
is_connect = false;
free_buffers_condition.notify_one();
}
@@ -189,11 +192,11 @@ u32 BufferQueue::Query(QueryType type) {
}
Kernel::KWritableEvent& BufferQueue::GetWritableBufferWaitEvent() {
- return buffer_wait_event.GetWritableEvent();
+ return buffer_wait_event->GetWritableEvent();
}
Kernel::KReadableEvent& BufferQueue::GetBufferWaitEvent() {
- return buffer_wait_event.GetReadableEvent();
+ return buffer_wait_event->GetReadableEvent();
}
} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index 61e337ac5..78de3f354 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -24,6 +24,10 @@ class KReadableEvent;
class KWritableEvent;
} // namespace Kernel
+namespace Service::KernelHelpers {
+class ServiceContext;
+} // namespace Service::KernelHelpers
+
namespace Service::NVFlinger {
constexpr u32 buffer_slots = 0x40;
@@ -38,7 +42,9 @@ struct IGBPBuffer {
u32_le index;
INSERT_PADDING_WORDS(3);
u32_le gpu_buffer_id;
- INSERT_PADDING_WORDS(17);
+ INSERT_PADDING_WORDS(6);
+ u32_le external_format;
+ INSERT_PADDING_WORDS(10);
u32_le nvmap_handle;
u32_le offset;
INSERT_PADDING_WORDS(60);
@@ -54,7 +60,8 @@ public:
NativeWindowFormat = 2,
};
- explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_);
+ explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
+ KernelHelpers::ServiceContext& service_context_);
~BufferQueue();
enum class BufferTransformFlags : u32 {
@@ -130,12 +137,14 @@ private:
std::list<u32> free_buffers;
std::array<Buffer, buffer_slots> buffers;
std::list<u32> queue_sequence;
- Kernel::KEvent buffer_wait_event;
+ Kernel::KEvent* buffer_wait_event{};
std::mutex free_buffers_mutex;
std::condition_variable free_buffers_condition;
std::mutex queue_sequence_mutex;
+
+ KernelHelpers::ServiceContext& service_context;
};
} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 941748970..3ead813b0 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -61,12 +61,13 @@ void NVFlinger::SplitVSync() {
}
}
-NVFlinger::NVFlinger(Core::System& system_) : system(system_) {
- displays.emplace_back(0, "Default", system);
- displays.emplace_back(1, "External", system);
- displays.emplace_back(2, "Edid", system);
- displays.emplace_back(3, "Internal", system);
- displays.emplace_back(4, "Null", system);
+NVFlinger::NVFlinger(Core::System& system_)
+ : system(system_), service_context(system_, "nvflinger") {
+ displays.emplace_back(0, "Default", service_context, system);
+ displays.emplace_back(1, "External", service_context, system);
+ displays.emplace_back(2, "Edid", service_context, system);
+ displays.emplace_back(3, "Internal", service_context, system);
+ displays.emplace_back(4, "Null", service_context, system);
guard = std::make_shared<std::mutex>();
// Schedule the screen composition events
@@ -146,7 +147,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
void NVFlinger::CreateLayerAtId(VI::Display& display, u64 layer_id) {
const u32 buffer_queue_id = next_buffer_queue_id++;
buffer_queues.emplace_back(
- std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id));
+ std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id, service_context));
display.CreateLayer(layer_id, *buffer_queues.back());
}
@@ -297,7 +298,7 @@ void NVFlinger::Compose() {
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
ASSERT(nvdisp);
- nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
+ nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format,
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
buffer->get().transform, buffer->get().crop_rect);
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index d80fd07ef..6d84cafb4 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -15,6 +15,7 @@
#include <vector>
#include "common/common_types.h"
+#include "core/hle/service/kernel_helpers.h"
namespace Common {
class Event;
@@ -135,6 +136,8 @@ private:
std::unique_ptr<std::thread> vsync_thread;
std::unique_ptr<Common::Event> wait_event;
std::atomic<bool> is_running{};
+
+ KernelHelpers::ServiceContext service_context;
};
} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
index 0dd342dbf..b7705c02a 100644
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -12,18 +12,21 @@
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_readable_event.h"
#include "core/hle/kernel/k_writable_event.h"
+#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/vi/display/vi_display.h"
#include "core/hle/service/vi/layer/vi_layer.h"
namespace Service::VI {
-Display::Display(u64 id, std::string name_, Core::System& system)
- : display_id{id}, name{std::move(name_)}, vsync_event{system.Kernel()} {
- Kernel::KAutoObject::Create(std::addressof(vsync_event));
- vsync_event.Initialize(fmt::format("Display VSync Event {}", id));
+Display::Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_,
+ Core::System& system_)
+ : display_id{id}, name{std::move(name_)}, service_context{service_context_} {
+ vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id));
}
-Display::~Display() = default;
+Display::~Display() {
+ service_context.CloseEvent(vsync_event);
+}
Layer& Display::GetLayer(std::size_t index) {
return *layers.at(index);
@@ -34,11 +37,11 @@ const Layer& Display::GetLayer(std::size_t index) const {
}
Kernel::KReadableEvent& Display::GetVSyncEvent() {
- return vsync_event.GetReadableEvent();
+ return vsync_event->GetReadableEvent();
}
void Display::SignalVSyncEvent() {
- vsync_event.GetWritableEvent().Signal();
+ vsync_event->GetWritableEvent().Signal();
}
void Display::CreateLayer(u64 layer_id, NVFlinger::BufferQueue& buffer_queue) {
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
index 166f2a4cc..0979fc421 100644
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -18,6 +18,9 @@ class KEvent;
namespace Service::NVFlinger {
class BufferQueue;
}
+namespace Service::KernelHelpers {
+class ServiceContext;
+} // namespace Service::KernelHelpers
namespace Service::VI {
@@ -31,10 +34,13 @@ class Display {
public:
/// Constructs a display with a given unique ID and name.
///
- /// @param id The unique ID for this display.
+ /// @param id The unique ID for this display.
+ /// @param service_context_ The ServiceContext for the owning service.
/// @param name_ The name for this display.
+ /// @param system_ The global system instance.
///
- Display(u64 id, std::string name_, Core::System& system);
+ Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_,
+ Core::System& system_);
~Display();
/// Gets the unique ID assigned to this display.
@@ -98,9 +104,10 @@ public:
private:
u64 display_id;
std::string name;
+ KernelHelpers::ServiceContext& service_context;
std::vector<std::shared_ptr<Layer>> layers;
- Kernel::KEvent vsync_event;
+ Kernel::KEvent* vsync_event{};
};
} // namespace Service::VI
diff --git a/src/core/network/network.cpp b/src/core/network/network.cpp
index 4732d4485..72eea52f0 100644
--- a/src/core/network/network.cpp
+++ b/src/core/network/network.cpp
@@ -7,7 +7,8 @@
#include <limits>
#include <utility>
#include <vector>
-#include "common/common_funcs.h"
+
+#include "common/error.h"
#ifdef _WIN32
#include <winsock2.h>
@@ -223,7 +224,7 @@ Errno GetAndLogLastError() {
if (err == Errno::AGAIN) {
return err;
}
- LOG_ERROR(Network, "Socket operation error: {}", NativeErrorToString(e));
+ LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
return err;
}
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 5a8cfd301..1f1607998 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
return "Unknown";
}
+static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) {
+ switch (backend) {
+ case Settings::NvdecEmulation::Off:
+ return "Off";
+ case Settings::NvdecEmulation::CPU:
+ return "CPU";
+ case Settings::NvdecEmulation::GPU:
+ return "GPU";
+ }
+ return "Unknown";
+}
+
u64 GetTelemetryId() {
u64 telemetry_id{};
const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id";
@@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation.GetValue());
- AddField(field_type, "Renderer_UseNvdecEmulation",
- Settings::values.use_nvdec_emulation.GetValue());
+ AddField(field_type, "Renderer_NvdecEmulation",
+ TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue()));
AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
AddField(field_type, "Renderer_ShaderBackend",
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index a982dd8a2..cd285e2c8 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -11,6 +11,8 @@
namespace Shader::Backend::GLSL {
namespace {
+constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"};
+
void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
if (!in_bounds) {
@@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
SetInBoundsFlag(ctx, inst);
}
+
+std::string_view BallotIndex(EmitContext& ctx) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ".x";
+ }
+ return "[gl_SubGroupInvocationARB>>5]";
+}
+
+std::string GetMask(EmitContext& ctx, std::string_view mask) {
+ const auto ballot_index{BallotIndex(ctx)};
+ return fmt::format("uint(uvec2({}){})", mask, ballot_index);
+}
} // Anonymous namespace
void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
- ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
+ ctx.AddU32("{}={}&31u;", inst, THREAD_ID);
}
void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
- } else {
- const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
- const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
- ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
+ return;
}
+ const auto ballot_index{BallotIndex(ctx)};
+ const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
+ const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
+ ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
}
void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
- } else {
- const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
- const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
- ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
+ return;
}
+ const auto ballot_index{BallotIndex(ctx)};
+ const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
+ const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
+ ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
}
void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
- } else {
- const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
- const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
- const auto value{fmt::format("({}^{})", ballot, active_mask)};
- ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
+ return;
}
+ const auto ballot_index{BallotIndex(ctx)};
+ const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
+ const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
+ const auto value{fmt::format("({}^{})", ballot, active_mask)};
+ ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
}
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
- if (!ctx.profile.warp_size_potentially_larger_than_guest) {
- ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
- } else {
- ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
- }
+ const auto ballot_index{BallotIndex(ctx)};
+ ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index);
}
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
- ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
+ ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB"));
}
void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
- ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
+ ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB"));
}
void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
- ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
+ ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB"));
}
void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
- ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
+ ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB"));
}
void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
- ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
+ ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB"));
}
void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
- std::string_view index, std::string_view clamp,
- std::string_view segmentation_mask) {
+ std::string_view index, std::string_view clamp, std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
- UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
+ UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask);
return;
}
- const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
- const auto thread_id{"gl_SubGroupInvocationARB"};
- const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
- const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
+ const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+ const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+ const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)};
+ const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+ const auto not_seg_mask{fmt::format("(~{})", seg_mask)};
+ const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)};
+ const auto max_thread_id{
+ ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)};
- const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
+ const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)};
const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
@@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
}
void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
- std::string_view clamp, std::string_view segmentation_mask) {
+ std::string_view clamp, std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
- UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
+ UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask);
return;
}
- const auto thread_id{"gl_SubGroupInvocationARB"};
- const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
- const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
+ const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+ const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+ const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+ const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
+ const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)};
ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
}
void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
- std::string_view index, std::string_view clamp,
- std::string_view segmentation_mask) {
+ std::string_view index, std::string_view clamp, std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
- UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
+ UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask);
return;
}
- const auto thread_id{"gl_SubGroupInvocationARB"};
- const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
- const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
+ const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+ const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+ const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+ const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
+ const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
@@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
std::string_view index, std::string_view clamp,
- std::string_view segmentation_mask) {
+ std::string_view seg_mask) {
if (ctx.profile.support_gl_warp_intrinsics) {
- UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
+ UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask);
return;
}
- const auto thread_id{"gl_SubGroupInvocationARB"};
- const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
- const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
+ const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+ const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+ const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+ const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
+ const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 2d29d8c14..2885e6799 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -15,6 +15,8 @@
namespace Shader::Backend::SPIRV {
namespace {
+constexpr size_t NUM_FIXEDFNCTEXTURE = 10;
+
enum class Operation {
Increment,
Decrement,
@@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
return pointer_type;
}
}
+
+size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations,
+ size_t start_offset) {
+ for (size_t location = start_offset; location < used_locations.size(); ++location) {
+ if (!used_locations.test(location)) {
+ return location;
+ }
+ }
+ throw RuntimeError("Unable to get an unused location for legacy attribute");
+}
} // Anonymous namespace
void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
loads[IR::Attribute::TessellationEvaluationPointV]) {
tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
}
+ std::bitset<IR::NUM_GENERICS> used_locations{};
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
const AttributeType input_type{runtime_info.generic_input_types[index]};
if (!runtime_info.previous_stage_stores.Generic(index)) {
@@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
if (input_type == AttributeType::Disabled) {
continue;
}
+ used_locations.set(index);
const Id type{GetAttributeType(*this, input_type)};
const Id id{DefineInput(*this, type, true)};
Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
@@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {
break;
}
}
+ size_t previous_unused_location = 0;
+ if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+ const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+ previous_unused_location = location;
+ used_locations.set(location);
+ const Id id{DefineInput(*this, F32[4], true)};
+ Decorate(id, spv::Decoration::Location, location);
+ input_front_color = id;
+ }
+ for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+ if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+ const size_t location =
+ FindNextUnusedLocation(used_locations, previous_unused_location);
+ previous_unused_location = location;
+ used_locations.set(location);
+ const Id id{DefineInput(*this, F32[4], true)};
+ Decorate(id, spv::Decoration::Location, location);
+ input_fixed_fnc_textures[index] = id;
+ }
+ }
if (stage == Stage::TessellationEval) {
for (size_t index = 0; index < info.uses_patches.size(); ++index) {
if (!info.uses_patches[index]) {
@@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
spv::BuiltIn::ViewportMaskNV);
}
+ std::bitset<IR::NUM_GENERICS> used_locations{};
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
if (info.stores.Generic(index)) {
DefineGenericOutput(*this, index, invocations);
+ used_locations.set(index);
+ }
+ }
+ size_t previous_unused_location = 0;
+ if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+ const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+ previous_unused_location = location;
+ used_locations.set(location);
+ const Id id{DefineOutput(*this, F32[4], invocations)};
+ Decorate(id, spv::Decoration::Location, static_cast<u32>(location));
+ output_front_color = id;
+ }
+ for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+ if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+ const size_t location =
+ FindNextUnusedLocation(used_locations, previous_unused_location);
+ previous_unused_location = location;
+ used_locations.set(location);
+ const Id id{DefineOutput(*this, F32[4], invocations)};
+ Decorate(id, spv::Decoration::Location, location);
+ output_fixed_fnc_textures[index] = id;
}
}
switch (stage) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index e277bc358..847d0c0e6 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -268,10 +268,14 @@ public:
Id write_global_func_u32x4{};
Id input_position{};
+ Id input_front_color{};
+ std::array<Id, 10> input_fixed_fnc_textures{};
std::array<Id, 32> input_generics{};
Id output_point_size{};
Id output_position{};
+ Id output_front_color{};
+ std::array<Id, 10> output_fixed_fnc_textures{};
std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
Id output_tess_level_outer{};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 9e54a17ee..68f360b3c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
}
}
+bool IsFixedFncTexture(IR::Attribute attribute) {
+ return attribute >= IR::Attribute::FixedFncTexture0S &&
+ attribute <= IR::Attribute::FixedFncTexture9Q;
+}
+
+u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) {
+ if (!IsFixedFncTexture(attribute)) {
+ throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+ }
+ return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u;
+}
+
+u32 FixedFncTextureAttributeElement(IR::Attribute attribute) {
+ if (!IsFixedFncTexture(attribute)) {
+ throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+ }
+ return static_cast<u32>(attribute) % 4u;
+}
+
template <typename... Args>
Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
if (ctx.stage == Stage::TessellationControl) {
@@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
}
}
+ if (IsFixedFncTexture(attr)) {
+ const u32 index{FixedFncTextureAttributeIndex(attr)};
+ const u32 element{FixedFncTextureAttributeElement(attr)};
+ const Id element_id{ctx.Const(element)};
+ return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index],
+ element_id);
+ }
switch (attr) {
case IR::Attribute::PointSize:
return ctx.output_point_size;
@@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
const Id element_id{ctx.Const(element)};
return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
}
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA: {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const Id element_id{ctx.Const(element)};
+ return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id);
+ }
case IR::Attribute::ClipDistance0:
case IR::Attribute::ClipDistance1:
case IR::Attribute::ClipDistance2:
@@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
const Id value{ctx.OpLoad(type->id, pointer)};
return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
}
+ if (IsFixedFncTexture(attr)) {
+ const u32 index{FixedFncTextureAttributeIndex(attr)};
+ const Id attr_id{ctx.input_fixed_fnc_textures[index]};
+ const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))};
+ return ctx.OpLoad(ctx.F32[1], attr_ptr);
+ }
switch (attr) {
case IR::Attribute::PrimitiveId:
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
@@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
case IR::Attribute::PositionW:
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
ctx.Const(element)));
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA: {
+ return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color,
+ ctx.Const(element)));
+ }
case IR::Attribute::InstanceId:
if (ctx.profile.support_vertex_instance_id) {
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 78b1e1ba7..cef52c56e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -7,8 +7,13 @@
namespace Shader::Backend::SPIRV {
namespace {
+Id GetThreadId(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
+}
+
Id WarpExtract(EmitContext& ctx, Id value) {
- const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id thread_id{GetThreadId(ctx)};
+ const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
}
@@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
return ctx.OpSelect(ctx.U32[1], in_range,
ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
}
+
+Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
+ const Id thirty_two{ctx.Const(32u)};
+ const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
+ const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
+ return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
+}
} // Anonymous namespace
Id EmitLaneId(EmitContext& ctx) {
- const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id id{GetThreadId(ctx)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return id;
}
@@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
- const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id thread_id{GetThreadId(ctx)};
+ if (ctx.profile.warp_size_potentially_larger_than_guest) {
+ const Id thirty_two{ctx.Const(32u)};
+ const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
+ const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
+ const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
+ index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
+ clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
+ }
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
@@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
- const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id thread_id{GetThreadId(ctx)};
+ if (ctx.profile.warp_size_potentially_larger_than_guest) {
+ clamp = GetUpperClamp(ctx, thread_id, clamp);
+ }
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
- const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id thread_id{GetThreadId(ctx)};
+ if (ctx.profile.warp_size_potentially_larger_than_guest) {
+ clamp = GetUpperClamp(ctx, thread_id, clamp);
+ }
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
- const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id thread_id{GetThreadId(ctx)};
+ if (ctx.profile.warp_size_potentially_larger_than_guest) {
+ clamp = GetUpperClamp(ctx, thread_id, clamp);
+ }
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2f6cdd216..269db21a5 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -231,6 +231,7 @@ endif()
target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
+target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
add_dependencies(video_core host_shaders)
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index f798a0053..61966cbfe 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -5,6 +5,7 @@
#include <fstream>
#include <vector>
#include "common/assert.h"
+#include "common/settings.h"
#include "video_core/command_classes/codecs/codec.h"
#include "video_core/command_classes/codecs/h264.h"
#include "video_core/command_classes/codecs/vp9.h"
@@ -16,108 +17,146 @@ extern "C" {
}
namespace Tegra {
-#if defined(LIBVA_FOUND)
-// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
namespace {
-constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
- "i915",
- "amdgpu",
-};
+constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
+constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
+
+void AVPacketDeleter(AVPacket* ptr) {
+ av_packet_free(&ptr);
+}
-AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
+using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
+
+AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
- if (*p == AV_PIX_FMT_VAAPI) {
- return AV_PIX_FMT_VAAPI;
+ if (*p == av_codec_ctx->pix_fmt) {
+ return av_codec_ctx->pix_fmt;
}
}
LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
- return *pix_fmts;
+ av_buffer_unref(&av_codec_ctx->hw_device_ctx);
+ av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
+ return PREFERRED_CPU_FMT;
+}
+} // namespace
+
+void AVFrameDeleter(AVFrame* ptr) {
+ av_frame_free(&ptr);
}
-bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
+Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
+ : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
+ vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
+
+Codec::~Codec() {
+ if (!initialized) {
+ return;
+ }
+ // Free libav memory
+ avcodec_free_context(&av_codec_ctx);
+ av_buffer_unref(&av_gpu_decoder);
+}
+
+bool Codec::CreateGpuAvDevice() {
+#if defined(LIBVA_FOUND)
+ static constexpr std::array<const char*, 3> VAAPI_DRIVERS = {
+ "i915",
+ "iHD",
+ "amdgpu",
+ };
AVDictionary* hwdevice_options = nullptr;
av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
for (const auto& driver : VAAPI_DRIVERS) {
av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
- const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
+ const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI,
nullptr, hwdevice_options, 0);
if (hwdevice_error >= 0) {
LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
av_dict_free(&hwdevice_options);
+ av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
return true;
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
av_dict_free(&hwdevice_options);
- return false;
-}
-} // namespace
#endif
-
-void AVFrameDeleter(AVFrame* ptr) {
- av_frame_free(&ptr);
+ static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
+ static constexpr std::array GPU_DECODER_TYPES{
+ AV_HWDEVICE_TYPE_CUDA,
+#ifdef _WIN32
+ AV_HWDEVICE_TYPE_D3D11VA,
+#else
+ AV_HWDEVICE_TYPE_VDPAU,
+#endif
+ };
+ for (const auto& type : GPU_DECODER_TYPES) {
+ const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
+ if (hwdevice_res < 0) {
+ LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
+ av_hwdevice_get_type_name(type), hwdevice_res);
+ continue;
+ }
+ for (int i = 0;; i++) {
+ const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
+ if (!config) {
+ LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
+ av_codec->name, av_hwdevice_get_type_name(type));
+ break;
+ }
+ if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
+ av_codec_ctx->pix_fmt = config->pix_fmt;
+ LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
+ return true;
+ }
+ }
+ }
+ return false;
}
-Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
- : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
- vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
-
-Codec::~Codec() {
- if (!initialized) {
- return;
- }
- // Free libav memory
- avcodec_send_packet(av_codec_ctx, nullptr);
- AVFrame* av_frame = av_frame_alloc();
- avcodec_receive_frame(av_codec_ctx, av_frame);
- avcodec_flush_buffers(av_codec_ctx);
- av_frame_free(&av_frame);
- avcodec_close(av_codec_ctx);
- av_buffer_unref(&av_hw_device);
+void Codec::InitializeAvCodecContext() {
+ av_codec_ctx = avcodec_alloc_context3(av_codec);
+ av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
}
-void Codec::InitializeHwdec() {
- // Prioritize integrated GPU to mitigate bandwidth bottlenecks
-#if defined(LIBVA_FOUND)
- if (CreateVaapiHwdevice(&av_hw_device)) {
- const auto hw_device_ctx = av_buffer_ref(av_hw_device);
- ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
- av_codec_ctx->hw_device_ctx = hw_device_ctx;
- av_codec_ctx->get_format = GetHwFormat;
+void Codec::InitializeGpuDecoder() {
+ if (!CreateGpuAvDevice()) {
+ av_buffer_unref(&av_gpu_decoder);
return;
}
-#endif
- // TODO more GPU accelerated decoders
+ auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
+ ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
+ av_codec_ctx->hw_device_ctx = hw_device_ctx;
+ av_codec_ctx->get_format = GetGpuFormat;
}
void Codec::Initialize() {
- AVCodecID codec;
- switch (current_codec) {
- case NvdecCommon::VideoCodec::H264:
- codec = AV_CODEC_ID_H264;
- break;
- case NvdecCommon::VideoCodec::Vp9:
- codec = AV_CODEC_ID_VP9;
- break;
- default:
- UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
+ const AVCodecID codec = [&] {
+ switch (current_codec) {
+ case NvdecCommon::VideoCodec::H264:
+ return AV_CODEC_ID_H264;
+ case NvdecCommon::VideoCodec::Vp9:
+ return AV_CODEC_ID_VP9;
+ default:
+ UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
+ return AV_CODEC_ID_NONE;
+ }
+ }();
+ av_codec = avcodec_find_decoder(codec);
+
+ InitializeAvCodecContext();
+ if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) {
+ InitializeGpuDecoder();
+ }
+ if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
+ LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
+ avcodec_free_context(&av_codec_ctx);
+ av_buffer_unref(&av_gpu_decoder);
return;
}
- av_codec = avcodec_find_decoder(codec);
- av_codec_ctx = avcodec_alloc_context3(av_codec);
- av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
- InitializeHwdec();
if (!av_codec_ctx->hw_device_ctx) {
LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
}
- const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
- if (av_error < 0) {
- LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
- avcodec_close(av_codec_ctx);
- av_buffer_unref(&av_hw_device);
- return;
- }
initialized = true;
}
@@ -133,6 +172,9 @@ void Codec::Decode() {
if (is_first_frame) {
Initialize();
}
+ if (!initialized) {
+ return;
+ }
bool vp9_hidden_frame = false;
std::vector<u8> frame_data;
if (current_codec == NvdecCommon::VideoCodec::H264) {
@@ -141,50 +183,48 @@ void Codec::Decode() {
frame_data = vp9_decoder->ComposeFrameHeader(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
}
- AVPacket packet{};
- av_init_packet(&packet);
- packet.data = frame_data.data();
- packet.size = static_cast<s32>(frame_data.size());
- if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
- LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
+ AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
+ if (!packet) {
+ LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
+ return;
+ }
+ packet->data = frame_data.data();
+ packet->size = static_cast<s32>(frame_data.size());
+ if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
+ LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
return;
}
// Only receive/store visible frames
if (vp9_hidden_frame) {
return;
}
- AVFrame* hw_frame = av_frame_alloc();
- AVFrame* sw_frame = hw_frame;
- ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
- if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
+ AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
+ AVFramePtr final_frame{nullptr, AVFrameDeleter};
+ ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
+ if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
- av_frame_free(&hw_frame);
return;
}
- if (!hw_frame->width || !hw_frame->height) {
+ if (initial_frame->width == 0 || initial_frame->height == 0) {
LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
- av_frame_free(&hw_frame);
return;
}
-#if defined(LIBVA_FOUND)
- // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
- if (hw_frame->format == AV_PIX_FMT_VAAPI) {
- sw_frame = av_frame_alloc();
- ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
+ if (av_codec_ctx->hw_device_ctx) {
+ final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
+ ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
// Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
// because Intel drivers crash unless using AV_PIX_FMT_NV12
- sw_frame->format = AV_PIX_FMT_NV12;
- const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
- ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
- av_frame_free(&hw_frame);
+ final_frame->format = PREFERRED_GPU_FMT;
+ const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
+ ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
+ } else {
+ final_frame = std::move(initial_frame);
}
-#endif
- if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
- UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
- av_frame_free(&sw_frame);
+ if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
+ UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
return;
}
- av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
+ av_frames.push(std::move(final_frame));
if (av_frames.size() > 10) {
LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
av_frames.pop();
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 71936203f..f9a80886f 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -5,6 +5,7 @@
#pragma once
#include <memory>
+#include <string_view>
#include <queue>
#include "common/common_types.h"
#include "video_core/command_classes/nvdec_common.h"
@@ -50,18 +51,23 @@ public:
/// Returns the value of current_codec
[[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+
/// Return name of the current codec
[[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
- void InitializeHwdec();
+ void InitializeAvCodecContext();
+
+ void InitializeGpuDecoder();
+
+ bool CreateGpuAvDevice();
bool initialized{};
NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
AVCodec* av_codec{nullptr};
- AVBufferRef* av_hw_device{nullptr};
AVCodecContext* av_codec_ctx{nullptr};
+ AVBufferRef* av_gpu_decoder{nullptr};
GPU& gpu;
const NvdecCommon::NvdecRegisters& state;
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index 5fb6d45ee..51ee14c13 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -95,7 +95,8 @@ const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister
const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
- writer.WriteUe(16);
+ // TODO (ameerj): Where do we get this number, it seems to be particular for each stream
+ writer.WriteUe(6); // Max number of reference frames
writer.WriteBit(false);
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
writer.WriteUe(pic_height - 1);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1aa43523a..7f4ca6282 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -475,10 +475,10 @@ public:
// These values are used by Nouveau and some games.
AddGL = 0x8006,
- SubtractGL = 0x8007,
- ReverseSubtractGL = 0x8008,
- MinGL = 0x800a,
- MaxGL = 0x800b
+ MinGL = 0x8007,
+ MaxGL = 0x8008,
+ SubtractGL = 0x800a,
+ ReverseSubtractGL = 0x800b
};
enum class Factor : u32 {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index c60ed6453..dce00e829 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
+
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7c9b0d6db..9ff0a28cd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
blit_screen.Recreate();
}
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
- scheduler.Flush(render_semaphore);
+ const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
+ scheduler.Flush(render_semaphore, present_semaphore);
scheduler.WaitWorker();
swapchain.Present(render_semaphore);
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index cb0580182..888bc7392 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -358,7 +358,7 @@ void VKBlitScreen::CreateDescriptorPool() {
void VKBlitScreen::CreateRenderPass() {
const VkAttachmentDescription color_attachment{
.flags = 0,
- .format = swapchain.GetImageFormat(),
+ .format = swapchain.GetImageViewFormat(),
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index 8e77e4796..d87da2a34 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <mutex>
#include <span>
#include <vector>
@@ -18,7 +19,6 @@ namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
constexpr size_t SETS_GROW_RATE = 16;
constexpr s32 SCORE_THRESHOLD = 3;
-constexpr u32 SETS_PER_POOL = 64;
struct DescriptorBank {
DescriptorBankInfo info;
@@ -58,11 +58,12 @@ static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
static void AllocatePool(const Device& device, DescriptorBank& bank) {
std::array<VkDescriptorPoolSize, 6> pool_sizes;
size_t pool_cursor{};
+ const u32 sets_per_pool = device.GetSetsPerPool();
const auto add = [&](VkDescriptorType type, u32 count) {
if (count > 0) {
pool_sizes[pool_cursor++] = {
.type = type,
- .descriptorCount = count * SETS_PER_POOL,
+ .descriptorCount = count * sets_per_pool,
};
}
};
@@ -77,7 +78,7 @@ static void AllocatePool(const Device& device, DescriptorBank& bank) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
- .maxSets = SETS_PER_POOL,
+ .maxSets = sets_per_pool,
.poolSizeCount = static_cast<u32>(pool_cursor),
.pPoolSizes = std::data(pool_sizes),
}));
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 3ac18ea54..3bcd6d6cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -228,9 +228,7 @@ void RasterizerVulkan::Clear() {
};
const u32 color_attachment = regs.clear_buffers.RT;
- const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask;
- const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
- if (use_color && is_color_rt) {
+ if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
VkClearValue clear_value;
std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
@@ -248,12 +246,15 @@ void RasterizerVulkan::Clear() {
return;
}
VkImageAspectFlags aspect_flags = 0;
- if (use_depth) {
+ if (use_depth && framebuffer->HasAspectDepthBit()) {
aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT;
}
- if (use_stencil) {
+ if (use_stencil && framebuffer->HasAspectStencilBit()) {
aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
+ if (aspect_flags == 0) {
+ return;
+ }
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
VkClearAttachment attachment;
@@ -764,12 +765,7 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass;
const Maxwell::ComparisonOp compare = regs.stencil_front_func_func;
if (regs.stencil_two_side_enable) {
- scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
- cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
- MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
- MaxwellToVK::ComparisonOp(compare));
- });
- } else {
+ // Separate stencil op per face
const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail;
const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail;
const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass;
@@ -784,6 +780,13 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
MaxwellToVK::StencilOp(back_zfail),
MaxwellToVK::ComparisonOp(back_compare));
});
+ } else {
+ // Front face defines the stencil op of both faces
+ scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
+ MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
+ MaxwellToVK::ComparisonOp(compare));
+ });
}
}
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 4840962de..1d438787a 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() {
worker_thread.join();
}
-void VKScheduler::Flush(VkSemaphore semaphore) {
- SubmitExecution(semaphore);
+void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+ SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
}
-void VKScheduler::Finish(VkSemaphore semaphore) {
+void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
const u64 presubmit_tick = CurrentTick();
- SubmitExecution(semaphore);
+ SubmitExecution(signal_semaphore, wait_semaphore);
WaitWorker();
Wait(presubmit_tick);
AllocateNewContext();
@@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() {
});
}
-void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
+void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
EndPendingOperations();
InvalidateState();
const u64 signal_value = master_semaphore->NextTick();
- Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+ Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
cmdbuf.End();
-
- const u32 num_signal_semaphores = semaphore ? 2U : 1U;
-
- const u64 wait_value = signal_value - 1;
- const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-
const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+
+ const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)};
- const std::array signal_semaphores{timeline_semaphore, semaphore};
+ const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
+
+ const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
+ const std::array wait_values{signal_value - 1, u64(1)};
+ const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
+ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ };
const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.pNext = nullptr,
- .waitSemaphoreValueCount = 1,
- .pWaitSemaphoreValues = &wait_value,
+ .waitSemaphoreValueCount = num_wait_semaphores,
+ .pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &timeline_si,
- .waitSemaphoreCount = 1,
- .pWaitSemaphores = &timeline_semaphore,
- .pWaitDstStageMask = &wait_stage_mask,
+ .waitSemaphoreCount = num_wait_semaphores,
+ .pWaitSemaphores = wait_semaphores.data(),
+ .pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.signalSemaphoreCount = num_signal_semaphores,
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index cf39a2363..759ed5a48 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -34,10 +34,10 @@ public:
~VKScheduler();
/// Sends the current execution context to the GPU.
- void Flush(VkSemaphore semaphore = nullptr);
+ void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
- void Finish(VkSemaphore semaphore = nullptr);
+ void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
@@ -191,7 +191,7 @@ private:
void AllocateWorkerCommandBuffer();
- void SubmitExecution(VkSemaphore semaphore);
+ void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
void AllocateNewContext();
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 5f78f6950..d90935f52 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -110,10 +110,6 @@ public:
return Exchange(Dirty::DepthTestEnable, false);
}
- bool TouchDepthBoundsEnable() {
- return Exchange(Dirty::DepthBoundsEnable, false);
- }
-
bool TouchDepthWriteEnable() {
return Exchange(Dirty::DepthWriteEnable, false);
}
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index d990eefba..aadf03cb0 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -20,16 +20,15 @@ namespace Vulkan {
namespace {
-VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) {
+VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {
if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
VkSurfaceFormatKHR format;
format.format = VK_FORMAT_B8G8R8A8_UNORM;
format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
return format;
}
- const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
- const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
- return format.format == request_format &&
+ const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
+ return format.format == VK_FORMAT_B8G8R8A8_UNORM &&
format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
});
return found != formats.end() ? *found : formats[0];
@@ -107,14 +106,12 @@ void VKSwapchain::AcquireNextImage() {
}
void VKSwapchain::Present(VkSemaphore render_semaphore) {
- const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
- const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
const auto present_queue{device.GetPresentQueue()};
const VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = nullptr,
- .waitSemaphoreCount = render_semaphore ? 2U : 1U,
- .pWaitSemaphores = semaphores.data(),
+ .waitSemaphoreCount = render_semaphore ? 1U : 0U,
+ .pWaitSemaphores = &render_semaphore,
.swapchainCount = 1,
.pSwapchains = swapchain.address(),
.pImageIndices = &image_index,
@@ -145,7 +142,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
- const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
+ const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
u32 requested_image_count{capabilities.minImageCount + 1};
@@ -180,6 +177,17 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
+ static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
+ VkImageFormatListCreateInfo format_list{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .viewFormatCount = static_cast<u32>(view_formats.size()),
+ .pViewFormats = view_formats.data(),
+ };
+ if (device.IsKhrSwapchainMutableFormatEnabled()) {
+ format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list);
+ swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR;
+ }
// Request the size again to reduce the possibility of a TOCTOU race condition.
const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -191,7 +199,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
images = swapchain.GetImages();
image_count = static_cast<u32>(images.size());
- image_format = surface_format.format;
+ image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
}
void VKSwapchain::CreateSemaphores() {
@@ -207,7 +215,7 @@ void VKSwapchain::CreateImageViews() {
.flags = 0,
.image = {},
.viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = image_format,
+ .format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 35c2cdc14..5bce41e21 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -68,8 +68,12 @@ public:
return *image_views[index];
}
- VkFormat GetImageFormat() const {
- return image_format;
+ VkFormat GetImageViewFormat() const {
+ return image_view_format;
+ }
+
+ VkSemaphore CurrentPresentSemaphore() const {
+ return *present_semaphores[frame_index];
}
private:
@@ -96,7 +100,7 @@ private:
u32 image_index{};
u32 frame_index{};
- VkFormat image_format{};
+ VkFormat image_view_format{};
VkExtent2D extent{};
bool current_srgb{};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 8f4df7122..ff979a7ac 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1186,9 +1186,12 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
renderpass_key.depth_format = depth_buffer->format;
num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
images[num_images] = depth_buffer->ImageHandle();
- image_ranges[num_images] = MakeSubresourceRange(depth_buffer);
+ const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer);
+ image_ranges[num_images] = subresource_range;
samples = depth_buffer->Samples();
++num_images;
+ has_depth = (subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0;
+ has_stencil = (subresource_range.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0;
} else {
renderpass_key.depth_format = PixelFormat::Invalid;
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 5fe6b7ba3..6d5a68bfe 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -232,6 +232,18 @@ public:
return image_ranges;
}
+ [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
+ return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
+ }
+
+ [[nodiscard]] bool HasAspectDepthBit() const noexcept {
+ return has_depth;
+ }
+
+ [[nodiscard]] bool HasAspectStencilBit() const noexcept {
+ return has_stencil;
+ }
+
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
@@ -241,6 +253,8 @@ private:
u32 num_images = 0;
std::array<VkImage, 9> images{};
std::array<VkImageSubresourceRange, 9> image_ranges{};
+ bool has_depth{};
+ bool has_stencil{};
};
struct TextureCacheParams {
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 8a4581c19..81a878bb2 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <filesystem>
#include <fstream>
#include <memory>
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index 6180b8c0e..74cd3c9d8 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -4,6 +4,7 @@
#pragma once
+#include <algorithm>
#include <array>
#include <bit>
#include <concepts>
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 3b575db4d..cae543a51 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -37,7 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
- const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
+ const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
+ const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
auto context = emu_window.CreateSharedContext();
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index c7f0d26ce..2caf98c7c 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -611,6 +611,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
+
+ sets_per_pool = 64;
+ if (driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE) {
+ // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2.
+ sets_per_pool = 96;
+ }
}
Device::~Device() = default;
@@ -851,6 +857,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
bool has_khr_shader_float16_int8{};
bool has_khr_workgroup_memory_explicit_layout{};
bool has_khr_pipeline_executable_properties{};
+ bool has_khr_image_format_list{};
+ bool has_khr_swapchain_mutable_format{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
@@ -900,6 +908,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
test(has_khr_workgroup_memory_explicit_layout,
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
+ test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false);
+ test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME,
+ false);
test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
if (Settings::values.enable_nsight_aftermath) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
@@ -1078,6 +1089,11 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
khr_pipeline_executable_properties = true;
}
}
+ if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
+ extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
+ extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
+ khr_swapchain_mutable_format = true;
+ }
if (khr_push_descriptor) {
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 234d74129..bc180a32a 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -224,6 +224,11 @@ public:
return khr_pipeline_executable_properties;
}
+ /// Returns true if VK_KHR_swapchain_mutable_format is enabled.
+ bool IsKhrSwapchainMutableFormatEnabled() const {
+ return khr_swapchain_mutable_format;
+ }
+
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
return khr_workgroup_memory_explicit_layout;
@@ -318,6 +323,10 @@ public:
return device_access_memory;
}
+ u32 GetSetsPerPool() const {
+ return sets_per_pool;
+ }
+
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
@@ -371,6 +380,7 @@ private:
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 max_push_descriptors{}; ///< Maximum number of push descriptors
+ u32 sets_per_pool{}; ///< Sets per Description Pool
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetic.
bool is_int8_supported{}; ///< Support for int8 arithmetic.
@@ -390,6 +400,7 @@ private:
bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
bool khr_pipeline_executable_properties{}; ///< Support for executable properties.
+ bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 85d292bcc..8744d8e5d 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -812,7 +812,7 @@ void Config::ReadRendererValues() {
ReadGlobalSetting(Settings::values.use_disk_shader_cache);
ReadGlobalSetting(Settings::values.gpu_accuracy);
ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
- ReadGlobalSetting(Settings::values.use_nvdec_emulation);
+ ReadGlobalSetting(Settings::values.nvdec_emulation);
ReadGlobalSetting(Settings::values.accelerate_astc);
ReadGlobalSetting(Settings::values.use_vsync);
ReadGlobalSetting(Settings::values.shader_backend);
@@ -1349,7 +1349,10 @@ void Config::SaveRendererValues() {
static_cast<u32>(Settings::values.gpu_accuracy.GetDefault()),
Settings::values.gpu_accuracy.UsingGlobal());
WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
- WriteGlobalSetting(Settings::values.use_nvdec_emulation);
+ WriteSetting(QString::fromStdString(Settings::values.nvdec_emulation.GetLabel()),
+ static_cast<u32>(Settings::values.nvdec_emulation.GetValue(global)),
+ static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()),
+ Settings::values.nvdec_emulation.UsingGlobal());
WriteGlobalSetting(Settings::values.accelerate_astc);
WriteGlobalSetting(Settings::values.use_vsync);
WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 9555f4498..4733227b6 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -182,5 +182,6 @@ private:
Q_DECLARE_METATYPE(Settings::CPUAccuracy);
Q_DECLARE_METATYPE(Settings::GPUAccuracy);
Q_DECLARE_METATYPE(Settings::FullscreenMode);
+Q_DECLARE_METATYPE(Settings::NvdecEmulation);
Q_DECLARE_METATYPE(Settings::RendererBackend);
Q_DECLARE_METATYPE(Settings::ShaderBackend);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 37e896258..c594164be 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -88,24 +88,30 @@ void ConfigureGraphics::SetConfiguration() {
ui->api_widget->setEnabled(runtime_lock);
ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setEnabled(runtime_lock);
- ui->use_nvdec_emulation->setEnabled(runtime_lock);
+ ui->nvdec_emulation_widget->setEnabled(runtime_lock);
ui->accelerate_astc->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
ui->use_asynchronous_gpu_emulation->setChecked(
Settings::values.use_asynchronous_gpu_emulation.GetValue());
- ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue());
ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue());
if (Settings::IsConfiguringGlobal()) {
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue()));
ui->fullscreen_mode_combobox->setCurrentIndex(
static_cast<int>(Settings::values.fullscreen_mode.GetValue()));
+ ui->nvdec_emulation->setCurrentIndex(
+ static_cast<int>(Settings::values.nvdec_emulation.GetValue()));
ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue());
} else {
ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend);
ConfigurationShared::SetHighlight(ui->api_widget,
!Settings::values.renderer_backend.UsingGlobal());
+ ConfigurationShared::SetPerGameSetting(ui->nvdec_emulation,
+ &Settings::values.nvdec_emulation);
+ ConfigurationShared::SetHighlight(ui->nvdec_emulation_widget,
+ !Settings::values.nvdec_emulation.UsingGlobal());
+
ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox,
&Settings::values.fullscreen_mode);
ConfigurationShared::SetHighlight(ui->fullscreen_mode_label,
@@ -137,8 +143,6 @@ void ConfigureGraphics::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation,
ui->use_asynchronous_gpu_emulation,
use_asynchronous_gpu_emulation);
- ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation,
- ui->use_nvdec_emulation, use_nvdec_emulation);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc,
accelerate_astc);
@@ -147,6 +151,9 @@ void ConfigureGraphics::ApplyConfiguration() {
if (Settings::values.renderer_backend.UsingGlobal()) {
Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
}
+ if (Settings::values.nvdec_emulation.UsingGlobal()) {
+ Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
+ }
if (Settings::values.shader_backend.UsingGlobal()) {
Settings::values.shader_backend.SetValue(shader_backend);
}
@@ -180,6 +187,13 @@ void ConfigureGraphics::ApplyConfiguration() {
}
}
+ if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+ Settings::values.nvdec_emulation.SetGlobal(true);
+ } else {
+ Settings::values.nvdec_emulation.SetGlobal(false);
+ Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
+ }
+
if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
Settings::values.bg_red.SetGlobal(true);
Settings::values.bg_green.SetGlobal(true);
@@ -278,6 +292,20 @@ Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
ConfigurationShared::USE_GLOBAL_OFFSET);
}
+Settings::NvdecEmulation ConfigureGraphics::GetCurrentNvdecEmulation() const {
+ if (Settings::IsConfiguringGlobal()) {
+ return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex());
+ }
+
+ if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+ Settings::values.nvdec_emulation.SetGlobal(true);
+ return Settings::values.nvdec_emulation.GetValue();
+ }
+ Settings::values.nvdec_emulation.SetGlobal(false);
+ return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex() -
+ ConfigurationShared::USE_GLOBAL_OFFSET);
+}
+
void ConfigureGraphics::SetupPerGameUI() {
if (Settings::IsConfiguringGlobal()) {
ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal());
@@ -286,7 +314,7 @@ void ConfigureGraphics::SetupPerGameUI() {
ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal());
ui->use_asynchronous_gpu_emulation->setEnabled(
Settings::values.use_asynchronous_gpu_emulation.UsingGlobal());
- ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal());
+ ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal());
ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal());
ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal());
ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal());
@@ -301,8 +329,6 @@ void ConfigureGraphics::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate(
ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache);
- ConfigurationShared::SetColoredTristate(
- ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation);
ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc,
accelerate_astc);
ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation,
@@ -316,4 +342,6 @@ void ConfigureGraphics::SetupPerGameUI() {
static_cast<int>(Settings::values.fullscreen_mode.GetValue(true)));
ConfigurationShared::InsertGlobalItem(
ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true)));
+ ConfigurationShared::InsertGlobalItem(
+ ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true)));
}
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index c866b911b..7d7ac329d 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -43,6 +43,7 @@ private:
void SetupPerGameUI();
Settings::RendererBackend GetCurrentGraphicsBackend() const;
+ Settings::NvdecEmulation GetCurrentNvdecEmulation() const;
std::unique_ptr<Ui::ConfigureGraphics> ui;
QColor bg_color;
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 099ddbb7c..1a12cfa4d 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -156,7 +156,7 @@
<item>
<widget class="QCheckBox" name="use_disk_shader_cache">
<property name="text">
- <string>Use disk shader cache</string>
+ <string>Use disk pipeline cache</string>
</property>
</widget>
</item>
@@ -168,13 +168,6 @@
</widget>
</item>
<item>
- <widget class="QCheckBox" name="use_nvdec_emulation">
- <property name="text">
- <string>Use NVDEC emulation</string>
- </property>
- </widget>
- </item>
- <item>
<widget class="QCheckBox" name="accelerate_astc">
<property name="text">
<string>Accelerate ASTC texture decoding</string>
@@ -182,6 +175,50 @@
</widget>
</item>
<item>
+ <widget class="QWidget" name="nvdec_emulation_widget" native="true">
+ <layout class="QHBoxLayout" name="nvdec_emulation_layout">
+ <property name="leftMargin">
+ <number>0</number>
+ </property>
+ <property name="topMargin">
+ <number>0</number>
+ </property>
+ <property name="rightMargin">
+ <number>0</number>
+ </property>
+ <property name="bottomMargin">
+ <number>0</number>
+ </property>
+ <item>
+ <widget class="QLabel" name="nvdec_emulation_label">
+ <property name="text">
+ <string>NVDEC emulation:</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QComboBox" name="nvdec_emulation">
+ <item>
+ <property name="text">
+ <string>Disabled</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>CPU Decoding</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GPU Decoding</string>
+ </property>
+ </item>
+ </widget>
+ </item>
+ </layout>
+ </widget>
+ </item>
+ <item>
<widget class="QWidget" name="fullscreen_mode_layout" native="true">
<layout class="QHBoxLayout" name="horizontalLayout_1">
<property name="leftMargin">
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 5891f8299..b91abc2f0 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -82,7 +82,7 @@
<string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
</property>
<property name="text">
- <string>Use asynchronous shader building (hack)</string>
+ <string>Use asynchronous shader building (Hack)</string>
</property>
</widget>
</item>
@@ -92,7 +92,7 @@
<string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
</property>
<property name="text">
- <string>Use Fast GPU Time (hack)</string>
+ <string>Use Fast GPU Time (Hack)</string>
</property>
</widget>
</item>
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index e97804220..f9d949e75 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -515,16 +515,16 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location"));
QAction* open_transferable_shader_cache =
- context_menu.addAction(tr("Open Transferable Shader Cache"));
+ context_menu.addAction(tr("Open Transferable Pipeline Cache"));
context_menu.addSeparator();
QMenu* remove_menu = context_menu.addMenu(tr("Remove"));
QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));
QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));
QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
- QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache"));
- QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache"));
+ QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache"));
+ QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache"));
remove_menu->addSeparator();
- QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches"));
+ QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Pipeline Caches"));
QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index e36774cc6..77d53e7bc 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -3174,12 +3174,11 @@ std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProv
}
bool GMainWindow::ConfirmClose() {
- if (emu_thread == nullptr || !UISettings::values.confirm_before_closing)
+ if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) {
return true;
-
- QMessageBox::StandardButton answer =
- QMessageBox::question(this, tr("yuzu"), tr("Are you sure you want to close yuzu?"),
- QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
+ }
+ const auto text = tr("Are you sure you want to close yuzu?");
+ const auto answer = QMessageBox::question(this, tr("yuzu"), text);
return answer != QMessageBox::No;
}
@@ -3261,14 +3260,13 @@ bool GMainWindow::ConfirmChangeGame() {
}
bool GMainWindow::ConfirmForceLockedExit() {
- if (emu_thread == nullptr)
+ if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) {
return true;
+ }
+ const auto text = tr("The currently running application has requested yuzu to not exit.\n\n"
+ "Would you like to bypass this and exit anyway?");
- const auto answer =
- QMessageBox::question(this, tr("yuzu"),
- tr("The currently running application has requested yuzu to not "
- "exit.\n\nWould you like to bypass this and exit anyway?"),
- QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
+ const auto answer = QMessageBox::question(this, tr("yuzu"), text);
return answer != QMessageBox::No;
}
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 757dd1ea0..891f7be6f 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -465,7 +465,7 @@ void Config::ReadValues() {
ReadSetting("Renderer", Settings::values.disable_fps_limit);
ReadSetting("Renderer", Settings::values.shader_backend);
ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
- ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
+ ReadSetting("Renderer", Settings::values.nvdec_emulation);
ReadSetting("Renderer", Settings::values.accelerate_astc);
ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index e02eceb99..72f3213fb 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -261,9 +261,9 @@ shader_backend =
# 0 (default): Off, 1: On
use_asynchronous_shaders =
-# Enable NVDEC emulation.
-# 0: Off, 1 (default): On
-use_nvdec_emulation =
+# NVDEC emulation.
+# 0: Disabled, 1: CPU Decoding, 2 (default): GPU Decoding
+nvdec_emulation =
# Accelerate ASTC texture decoding.
# 0: Off, 1 (default): On