summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/CMakeLists.txt9
-rw-r--r--src/common/CMakeLists.txt16
-rw-r--r--src/common/fiber.cpp8
-rw-r--r--src/common/fiber.h2
-rw-r--r--src/common/file_util.cpp31
-rw-r--r--src/common/file_util.h2
-rw-r--r--src/common/logging/backend.cpp2
-rw-r--r--src/common/misc.cpp15
-rw-r--r--src/common/stream.h12
-rw-r--r--src/common/string_util.cpp5
-rw-r--r--src/common/timer.cpp12
-rw-r--r--src/common/wall_clock.cpp2
-rw-r--r--src/common/x64/native_clock.h2
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/core.cpp10
-rw-r--r--src/core/hle/kernel/process.cpp3
-rw-r--r--src/core/hle/service/ldr/ldr.cpp16
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp51
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp135
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h20
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp2
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp17
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h14
-rw-r--r--src/core/hle/service/nvdrv/syncpoint_manager.cpp39
-rw-r--r--src/core/hle/service/nvdrv/syncpoint_manager.h85
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp4
-rw-r--r--src/input_common/gcadapter/gc_adapter.cpp462
-rw-r--r--src/input_common/gcadapter/gc_adapter.h146
-rw-r--r--src/input_common/gcadapter/gc_poller.cpp128
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp1
-rw-r--r--src/video_core/command_classes/codecs/codec.h8
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp31
-rw-r--r--src/video_core/command_classes/codecs/h264.h24
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp168
-rw-r--r--src/video_core/command_classes/codecs/vp9.h82
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h11
-rw-r--r--src/video_core/command_classes/host1x.cpp2
-rw-r--r--src/video_core/command_classes/host1x.h2
-rw-r--r--src/video_core/command_classes/nvdec.cpp6
-rw-r--r--src/video_core/command_classes/nvdec.h10
-rw-r--r--src/video_core/command_classes/sync_manager.cpp14
-rw-r--r--src/video_core/command_classes/sync_manager.h4
-rw-r--r--src/video_core/command_classes/vic.cpp2
-rw-r--r--src/video_core/command_classes/vic.h4
-rw-r--r--src/video_core/dma_pusher.cpp80
-rw-r--r--src/video_core/dma_pusher.h49
-rw-r--r--src/video_core/engines/shader_bytecode.h2
-rw-r--r--src/video_core/gpu.cpp48
-rw-r--r--src/video_core/gpu.h25
-rw-r--r--src/video_core/shader/async_shaders.cpp17
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp3
-rw-r--r--src/web_service/web_backend.cpp5
53 files changed, 1187 insertions, 673 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 71efbb40d..dbda528ce 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -32,7 +32,6 @@ if (MSVC)
# /Zc:inline - Let codegen omit inline functions in object files
# /Zc:throwingNew - Let codegen assume `operator new` (without std::nothrow) will never return null
add_compile_options(
- /W3
/MP
/Zi
/Zo
@@ -43,6 +42,13 @@ if (MSVC)
/Zc:externConstexpr
/Zc:inline
/Zc:throwingNew
+
+ # Warnings
+ /W3
+ /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
+ /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
+ /we4555 # Expression has no effect; expected expression with side-effect
+ /we4834 # Discarding return value of function with 'nodiscard' attribute
)
# /GS- - No stack buffer overflow checks
@@ -56,6 +62,7 @@ else()
-Werror=implicit-fallthrough
-Werror=missing-declarations
-Werror=reorder
+ -Werror=unused-result
-Wextra
-Wmissing-declarations
-Wno-attributes
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index e50ab2922..207c7a0a6 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -190,6 +190,22 @@ if(ARCHITECTURE_x86_64)
)
endif()
+if (MSVC)
+ target_compile_definitions(common PRIVATE
+ # The standard library doesn't provide any replacement for codecvt yet
+ # so we can disable this deprecation warning for the time being.
+ _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
+ )
+ target_compile_options(common PRIVATE
+ /W4
+ /WX
+ )
+else()
+ target_compile_options(common PRIVATE
+ -Werror
+ )
+endif()
+
create_target_directory_groups(common)
find_package(Boost 1.71 COMPONENTS context headers REQUIRED)
diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index e186ed880..b209f52fc 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -79,9 +79,9 @@ void Fiber::Exit() {
released = true;
}
-void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param) {
rewind_point = std::move(rewind_func);
- rewind_parameter = start_parameter;
+ rewind_parameter = rewind_param;
}
void Fiber::Rewind() {
@@ -161,9 +161,9 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
}
-void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param) {
rewind_point = std::move(rewind_func);
- rewind_parameter = start_parameter;
+ rewind_parameter = rewind_param;
}
Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}
diff --git a/src/common/fiber.h b/src/common/fiber.h
index cefd61df9..699286ee2 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -49,7 +49,7 @@ public:
static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
[[nodiscard]] static std::shared_ptr<Fiber> ThreadToFiber();
- void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
+ void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param);
void Rewind();
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 16c3713e0..18fbfa25b 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -472,13 +472,14 @@ u64 ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
}
bool DeleteDirRecursively(const std::string& directory, unsigned int recursion) {
- const auto callback = [recursion](u64* num_entries_out, const std::string& directory,
- const std::string& virtual_name) -> bool {
- std::string new_path = directory + DIR_SEP_CHR + virtual_name;
+ const auto callback = [recursion](u64*, const std::string& directory,
+ const std::string& virtual_name) {
+ const std::string new_path = directory + DIR_SEP_CHR + virtual_name;
if (IsDirectory(new_path)) {
- if (recursion == 0)
+ if (recursion == 0) {
return false;
+ }
return DeleteDirRecursively(new_path, recursion - 1);
}
return Delete(new_path);
@@ -492,7 +493,8 @@ bool DeleteDirRecursively(const std::string& directory, unsigned int recursion)
return true;
}
-void CopyDir(const std::string& source_path, const std::string& dest_path) {
+void CopyDir([[maybe_unused]] const std::string& source_path,
+ [[maybe_unused]] const std::string& dest_path) {
#ifndef _WIN32
if (source_path == dest_path) {
return;
@@ -553,7 +555,7 @@ std::optional<std::string> GetCurrentDir() {
std::string strDir = dir;
#endif
free(dir);
- return std::move(strDir);
+ return strDir;
}
bool SetCurrentDir(const std::string& directory) {
@@ -772,21 +774,23 @@ std::size_t ReadFileToString(bool text_file, const std::string& filename, std::s
void SplitFilename83(const std::string& filename, std::array<char, 9>& short_name,
std::array<char, 4>& extension) {
- const std::string forbidden_characters = ".\"/\\[]:;=, ";
+ static constexpr std::string_view forbidden_characters = ".\"/\\[]:;=, ";
// On a FAT32 partition, 8.3 names are stored as a 11 bytes array, filled with spaces.
short_name = {{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}};
extension = {{' ', ' ', ' ', '\0'}};
- std::string::size_type point = filename.rfind('.');
- if (point == filename.size() - 1)
+ auto point = filename.rfind('.');
+ if (point == filename.size() - 1) {
point = filename.rfind('.', point);
+ }
// Get short name.
int j = 0;
for (char letter : filename.substr(0, point)) {
- if (forbidden_characters.find(letter, 0) != std::string::npos)
+ if (forbidden_characters.find(letter, 0) != std::string::npos) {
continue;
+ }
if (j == 8) {
// TODO(Link Mauve): also do that for filenames containing a space.
// TODO(Link Mauve): handle multiple files having the same short name.
@@ -794,14 +798,15 @@ void SplitFilename83(const std::string& filename, std::array<char, 9>& short_nam
short_name[7] = '1';
break;
}
- short_name[j++] = toupper(letter);
+ short_name[j++] = static_cast<char>(std::toupper(letter));
}
// Get extension.
if (point != std::string::npos) {
j = 0;
- for (char letter : filename.substr(point + 1, 3))
- extension[j++] = toupper(letter);
+ for (char letter : filename.substr(point + 1, 3)) {
+ extension[j++] = static_cast<char>(std::toupper(letter));
+ }
}
}
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 8b587320f..840cde2a6 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -232,7 +232,7 @@ public:
void Swap(IOFile& other) noexcept;
- [[nodiscard]] bool Open(const std::string& filename, const char openmode[], int flags = 0);
+ bool Open(const std::string& filename, const char openmode[], int flags = 0);
bool Close();
template <typename T>
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 62cfde397..90dfa22ca 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -274,7 +274,6 @@ const char* GetLogClassName(Class log_class) {
case Class::Count:
break;
}
- UNREACHABLE();
return "Invalid";
}
@@ -293,7 +292,6 @@ const char* GetLevelName(Level log_level) {
break;
}
#undef LVL
- UNREACHABLE();
return "Invalid";
}
diff --git a/src/common/misc.cpp b/src/common/misc.cpp
index 68cb86cd1..1d5393597 100644
--- a/src/common/misc.cpp
+++ b/src/common/misc.cpp
@@ -16,16 +16,23 @@
// Call directly after the command or use the error num.
// This function might change the error code.
std::string GetLastErrorMsg() {
- static const std::size_t buff_size = 255;
+ static constexpr std::size_t buff_size = 255;
char err_str[buff_size];
#ifdef _WIN32
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(),
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), err_str, buff_size, nullptr);
+ return std::string(err_str, buff_size);
+#elif defined(__GLIBC__) && (_GNU_SOURCE || (_POSIX_C_SOURCE < 200112L && _XOPEN_SOURCE < 600))
+ // Thread safe (GNU-specific)
+ const char* str = strerror_r(errno, err_str, buff_size);
+ return std::string(str);
#else
// Thread safe (XSI-compliant)
- strerror_r(errno, err_str, buff_size);
+ const int success = strerror_r(errno, err_str, buff_size);
+ if (success != 0) {
+ return {};
+ }
+ return std::string(err_str);
#endif
-
- return std::string(err_str, buff_size);
}
diff --git a/src/common/stream.h b/src/common/stream.h
index 2585c16af..0e40692de 100644
--- a/src/common/stream.h
+++ b/src/common/stream.h
@@ -21,6 +21,12 @@ public:
explicit Stream();
~Stream();
+ Stream(const Stream&) = delete;
+ Stream& operator=(const Stream&) = delete;
+
+ Stream(Stream&&) = default;
+ Stream& operator=(Stream&&) = default;
+
/// Reposition bitstream "cursor" to the specified offset from origin
void Seek(s32 offset, SeekOrigin origin);
@@ -30,15 +36,15 @@ public:
/// Writes byte at current position
void WriteByte(u8 byte);
- std::size_t GetPosition() const {
+ [[nodiscard]] std::size_t GetPosition() const {
return position;
}
- std::vector<u8>& GetBuffer() {
+ [[nodiscard]] std::vector<u8>& GetBuffer() {
return buffer;
}
- const std::vector<u8>& GetBuffer() const {
+ [[nodiscard]] const std::vector<u8>& GetBuffer() const {
return buffer;
}
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index 84883a1d3..4cba2aaa4 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -8,6 +8,7 @@
#include <cstdlib>
#include <locale>
#include <sstream>
+
#include "common/common_paths.h"
#include "common/logging/log.h"
#include "common/string_util.h"
@@ -21,14 +22,14 @@ namespace Common {
/// Make a string lowercase
std::string ToLower(std::string str) {
std::transform(str.begin(), str.end(), str.begin(),
- [](unsigned char c) { return std::tolower(c); });
+ [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
return str;
}
/// Make a string uppercase
std::string ToUpper(std::string str) {
std::transform(str.begin(), str.end(), str.begin(),
- [](unsigned char c) { return std::toupper(c); });
+ [](unsigned char c) { return static_cast<char>(std::toupper(c)); });
return str;
}
diff --git a/src/common/timer.cpp b/src/common/timer.cpp
index 2dc15e434..d17dc2a50 100644
--- a/src/common/timer.cpp
+++ b/src/common/timer.cpp
@@ -142,20 +142,18 @@ std::string Timer::GetTimeFormatted() {
// ----------------
double Timer::GetDoubleTime() {
// Get continuous timestamp
- u64 TmpSeconds = static_cast<u64>(Common::Timer::GetTimeSinceJan1970().count());
- double ms = static_cast<u64>(GetTimeMs().count()) % 1000;
+ auto tmp_seconds = static_cast<u64>(GetTimeSinceJan1970().count());
+ const auto ms = static_cast<double>(static_cast<u64>(GetTimeMs().count()) % 1000);
// Remove a few years. We only really want enough seconds to make
// sure that we are detecting actual actions, perhaps 60 seconds is
// enough really, but I leave a year of seconds anyway, in case the
// user's clock is incorrect or something like that.
- TmpSeconds = TmpSeconds - (38 * 365 * 24 * 60 * 60);
+ tmp_seconds = tmp_seconds - (38 * 365 * 24 * 60 * 60);
// Make a smaller integer that fits in the double
- u32 Seconds = static_cast<u32>(TmpSeconds);
- double TmpTime = Seconds + ms;
-
- return TmpTime;
+ const auto seconds = static_cast<u32>(tmp_seconds);
+ return seconds + ms;
}
} // Namespace Common
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index 7a20e95b7..452a2837e 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -53,7 +53,7 @@ public:
return Common::Divide128On32(temporary, 1000000000).first;
}
- void Pause(bool is_paused) override {
+ void Pause([[maybe_unused]] bool is_paused) override {
// Do nothing in this clock type.
}
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
index 7c503df26..97aab6ac9 100644
--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -34,7 +34,7 @@ private:
/// value used to reduce the native clocks accuracy as some apss rely on
/// undefined behavior where the level of accuracy in the clock shouldn't
/// be higher.
- static constexpr u64 inaccuracy_mask = ~(0x400 - 1);
+ static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
SpinLock rtsc_serialize{};
u64 last_measure{};
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index e0f207f3e..9a983e81d 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -454,6 +454,8 @@ add_library(core STATIC
hle/service/nvdrv/nvdrv.h
hle/service/nvdrv/nvmemp.cpp
hle/service/nvdrv/nvmemp.h
+ hle/service/nvdrv/syncpoint_manager.cpp
+ hle/service/nvdrv/syncpoint_manager.h
hle/service/nvflinger/buffer_queue.cpp
hle/service/nvflinger/buffer_queue.h
hle/service/nvflinger/nvflinger.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index fde2ccc09..242796008 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -179,16 +179,18 @@ struct System::Impl {
arp_manager.ResetAll();
telemetry_session = std::make_unique<Core::TelemetrySession>();
+
+ gpu_core = VideoCore::CreateGPU(emu_window, system);
+ if (!gpu_core) {
+ return ResultStatus::ErrorVideoCore;
+ }
+
service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);
Service::Init(service_manager, system);
GDBStub::DeferStart();
interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
- gpu_core = VideoCore::CreateGPU(emu_window, system);
- if (!gpu_core) {
- return ResultStatus::ErrorVideoCore;
- }
// Initialize time manager, which must happen after kernel is created
time_manager.Initialize();
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index ff9d9248b..b17529dee 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -4,6 +4,7 @@
#include <algorithm>
#include <bitset>
+#include <ctime>
#include <memory>
#include <random>
#include "common/alignment.h"
@@ -123,7 +124,7 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name,
: kernel.CreateNewUserProcessID();
process->capabilities.InitializeForMetadatalessProcess();
- std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(0));
+ std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(std::time(nullptr)));
std::uniform_int_distribution<u64> distribution;
std::generate(process->random_entropy.begin(), process->random_entropy.end(),
[&] { return distribution(rng); });
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9ad5bbf0d..eeaca44b6 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -166,7 +166,7 @@ public:
{0, &RelocatableObject::LoadNro, "LoadNro"},
{1, &RelocatableObject::UnloadNro, "UnloadNro"},
{2, &RelocatableObject::LoadNrr, "LoadNrr"},
- {3, nullptr, "UnloadNrr"},
+ {3, &RelocatableObject::UnloadNrr, "UnloadNrr"},
{4, &RelocatableObject::Initialize, "Initialize"},
{10, nullptr, "LoadNrrEx"},
};
@@ -272,6 +272,20 @@ public:
rb.Push(RESULT_SUCCESS);
}
+ void UnloadNrr(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto pid = rp.Pop<u64>();
+ const auto nrr_address = rp.Pop<VAddr>();
+
+ LOG_DEBUG(Service_LDR, "called with pid={}, nrr_address={:016X}", pid, nrr_address);
+
+ nrr.erase(nrr_address);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+
+ rb.Push(RESULT_SUCCESS);
+ }
+
bool ValidateRegionForMap(Kernel::Memory::PageTable& page_table, VAddr start,
std::size_t size) const {
constexpr std::size_t padding_size{4 * Kernel::Memory::PageSize};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index b27ee0502..8356a8139 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -15,8 +15,9 @@
namespace Service::Nvidia::Devices {
-nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface)
- : nvdevice(system), events_interface{events_interface} {}
+nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+ SyncpointManager& syncpoint_manager)
+ : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
nvhost_ctrl::~nvhost_ctrl() = default;
u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -36,8 +37,8 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::v
return IocCtrlEventRegister(input, output);
case IoctlCommand::IocCtrlEventUnregisterCommand:
return IocCtrlEventUnregister(input, output);
- case IoctlCommand::IocCtrlEventSignalCommand:
- return IocCtrlEventSignal(input, output);
+ case IoctlCommand::IocCtrlClearEventWaitCommand:
+ return IocCtrlClearEventWait(input, output);
default:
UNIMPLEMENTED_MSG("Unimplemented ioctl");
return 0;
@@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
return NvResult::BadParameter;
}
+ if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+ params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id);
+ std::memcpy(output.data(), &params, sizeof(params));
+ return NvResult::Success;
+ }
+
+ if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id);
+ syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+ params.value = new_value;
+ std::memcpy(output.data(), &params, sizeof(params));
+ return NvResult::Success;
+ }
+
auto event = events_interface.events[event_id];
auto& gpu = system.GPU();
+
// This is mostly to take into account unimplemented features. As synced
// gpu is always synced.
if (!gpu.IsAsync()) {
- event.writable->Signal();
+ event.event.writable->Signal();
return NvResult::Success;
}
auto lock = gpu.LockSync();
- const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
+ const u32 current_syncpoint_value = event.fence.value;
const s32 diff = current_syncpoint_value - params.threshold;
if (diff >= 0) {
- event.writable->Signal();
+ event.event.writable->Signal();
params.value = current_syncpoint_value;
std::memcpy(output.data(), &params, sizeof(params));
return NvResult::Success;
@@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
}
params.value |= event_id;
- event.writable->Clear();
+ event.event.writable->Clear();
gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
if (!is_async && ctrl.fresh_call) {
ctrl.must_delay = true;
@@ -154,24 +169,22 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto
return NvResult::Success;
}
-u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
IocCtrlEventSignalParams params{};
std::memcpy(&params, input.data(), sizeof(params));
- // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization
- // It is believed from RE to cancel the GPU Event. However, better research is required
- u32 event_id = params.user_event_id & 0x00FF;
- LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id);
+
+ u32 event_id = params.event_id & 0x00FF;
+ LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
+
if (event_id >= MaxNvEvents) {
return NvResult::BadParameter;
}
if (events_interface.status[event_id] == EventState::Waiting) {
- auto& gpu = system.GPU();
- if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id],
- events_interface.assigned_value[event_id])) {
- events_interface.LiberateEvent(event_id);
- events_interface.events[event_id].writable->Signal();
- }
+ events_interface.LiberateEvent(event_id);
}
+
+ syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id);
+
return NvResult::Success;
}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 9898623de..24ad96cb9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices {
class nvhost_ctrl final : public nvdevice {
public:
- explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface);
+ explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+ SyncpointManager& syncpoint_manager);
~nvhost_ctrl() override;
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -31,7 +32,7 @@ private:
IocSyncptWaitexCommand = 0xC0100019,
IocSyncptReadMaxCommand = 0xC008001A,
IocGetConfigCommand = 0xC183001B,
- IocCtrlEventSignalCommand = 0xC004001C,
+ IocCtrlClearEventWaitCommand = 0xC004001C,
IocCtrlEventWaitCommand = 0xC010001D,
IocCtrlEventWaitAsyncCommand = 0xC010001E,
IocCtrlEventRegisterCommand = 0xC004001F,
@@ -94,7 +95,7 @@ private:
static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");
struct IocCtrlEventSignalParams {
- u32_le user_event_id;
+ u32_le event_id;
};
static_assert(sizeof(IocCtrlEventSignalParams) == 4,
"IocCtrlEventSignalParams is incorrect size");
@@ -142,9 +143,10 @@ private:
u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
- u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output);
+ u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
EventInterface& events_interface;
+ SyncpointManager& syncpoint_manager;
};
} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index f1966ac0e..152019548 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -7,14 +7,20 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Service::Nvidia::Devices {
-nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
- : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
+nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+ SyncpointManager& syncpoint_manager)
+ : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {
+ channel_fence.id = syncpoint_manager.AllocateSyncpoint();
+ channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+}
+
nvhost_gpu::~nvhost_gpu() = default;
u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
params.unk3);
- auto& gpu = system.GPU();
- params.fence_out.id = assigned_syncpoints;
- params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
- assigned_syncpoints++;
+ channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+
+ params.fence_out = channel_fence;
+
std::memcpy(output.data(), &params, output.size());
return 0;
}
@@ -145,39 +151,100 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
return 0;
}
-u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
- if (input.size() < sizeof(IoctlSubmitGpfifo)) {
- UNIMPLEMENTED();
+static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
+ return {
+ Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+ Tegra::SubmissionMode::Increasing),
+ {fence.value},
+ Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+ Tegra::SubmissionMode::Increasing),
+ Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id),
+ };
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) {
+ std::vector<Tegra::CommandHeader> result{
+ Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+ Tegra::SubmissionMode::Increasing),
+ {}};
+
+ for (u32 count = 0; count < add_increment; ++count) {
+ result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+ Tegra::SubmissionMode::Increasing));
+ result.emplace_back(
+ Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
}
- IoctlSubmitGpfifo params{};
- std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+
+ return result;
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence,
+ u32 add_increment) {
+ std::vector<Tegra::CommandHeader> result{
+ Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
+ Tegra::SubmissionMode::Increasing),
+ {}};
+ const std::vector<Tegra::CommandHeader> increment{
+ BuildIncrementCommandList(fence, add_increment)};
+
+ result.insert(result.end(), increment.begin(), increment.end());
+
+ return result;
+}
+
+u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+ Tegra::CommandList&& entries) {
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
params.num_entries, params.flags.raw);
- ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
- params.num_entries * sizeof(Tegra::CommandListHeader),
- "Incorrect input size");
+ auto& gpu = system.GPU();
- Tegra::CommandList entries(params.num_entries);
- std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
- params.num_entries * sizeof(Tegra::CommandListHeader));
+ params.fence_out.id = channel_fence.id;
- UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
- UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
+ if (params.flags.add_wait.Value() &&
+ !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
+ gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
+ }
- auto& gpu = system.GPU();
- u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
- if (params.flags.increment.Value()) {
- params.fence_out.value += current_syncpoint_value;
+ if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
+ const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
+ params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
+ params.fence_out.id, params.AddIncrementValue() + increment_value);
} else {
- params.fence_out.value = current_syncpoint_value;
+ params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
}
+
+ entries.RefreshIntegrityChecks(gpu);
gpu.PushGPUEntries(std::move(entries));
+ if (params.flags.add_increment.Value()) {
+ if (params.flags.suppress_wfi) {
+ gpu.PushGPUEntries(Tegra::CommandList{
+ BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
+ } else {
+ gpu.PushGPUEntries(Tegra::CommandList{
+ BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
+ }
+ }
+
std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
return 0;
}
+u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
+ if (input.size() < sizeof(IoctlSubmitGpfifo)) {
+ UNIMPLEMENTED();
+ }
+ IoctlSubmitGpfifo params{};
+ std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+
+ Tegra::CommandList entries(params.num_entries);
+ std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)],
+ params.num_entries * sizeof(Tegra::CommandListHeader));
+
+ return SubmitGPFIFOImpl(params, output, std::move(entries));
+}
+
u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
const std::vector<u8>& input2, IoctlVersion version) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
@@ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
}
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
- LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
- params.num_entries, params.flags.raw);
Tegra::CommandList entries(params.num_entries);
if (version == IoctlVersion::Version2) {
- std::memcpy(entries.data(), input2.data(),
+ std::memcpy(entries.command_lists.data(), input2.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
} else {
- system.Memory().ReadBlock(params.address, entries.data(),
+ system.Memory().ReadBlock(params.address, entries.command_lists.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
}
- UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
- UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
-
- auto& gpu = system.GPU();
- u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
- if (params.flags.increment.Value()) {
- params.fence_out.value += current_syncpoint_value;
- } else {
- params.fence_out.value = current_syncpoint_value;
- }
- gpu.PushGPUEntries(std::move(entries));
- std::memcpy(output.data(), &params, output.size());
- return 0;
+ return SubmitGPFIFOImpl(params, output, std::move(entries));
}
u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 2ac74743f..a252fc06d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -11,6 +11,11 @@
#include "common/swap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
#include "core/hle/service/nvdrv/nvdata.h"
+#include "video_core/dma_pusher.h"
+
+namespace Service::Nvidia {
+class SyncpointManager;
+}
namespace Service::Nvidia::Devices {
@@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
class nvhost_gpu final : public nvdevice {
public:
- explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+ explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+ SyncpointManager& syncpoint_manager);
~nvhost_gpu() override;
u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -162,10 +168,15 @@ private:
u32_le raw;
BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
BitField<1, 1, u32_le> add_increment; // append an increment to the list
- BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
+ BitField<2, 1, u32_le> new_hw_format; // mostly ignored
+ BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt
BitField<8, 1, u32_le> increment; // increment the returned fence
} flags;
Fence fence_out; // returned new fence object for others to wait on
+
+ u32 AddIncrementValue() const {
+ return flags.add_increment.Value() << 1;
+ }
};
static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
"IoctlSubmitGpfifo is incorrect size");
@@ -190,6 +201,8 @@ private:
u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
+ u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+ Tegra::CommandList&& entries);
u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
const std::vector<u8>& input2, IoctlVersion version);
@@ -198,7 +211,8 @@ private:
u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
std::shared_ptr<nvmap> nvmap_dev;
- u32 assigned_syncpoints{};
+ SyncpointManager& syncpoint_manager;
+ Fence channel_fence;
};
} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 85792495f..30f03f845 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -38,7 +38,7 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
namespace NvErrCodes {
constexpr u32 Success{};
-constexpr u32 OutOfMemory{static_cast<u32>(-12)};
+[[maybe_unused]] constexpr u32 OutOfMemory{static_cast<u32>(-12)};
constexpr u32 InvalidInput{static_cast<u32>(-22)};
} // namespace NvErrCodes
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 803c1a984..a46755cdc 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -21,6 +21,7 @@
#include "core/hle/service/nvdrv/interface.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/hle/service/nvdrv/nvmemp.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/hle/service/nvflinger/nvflinger.h"
namespace Service::Nvidia {
@@ -36,21 +37,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
nvflinger.SetNVDrvInstance(module_);
}
-Module::Module(Core::System& system) {
+Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
auto& kernel = system.Kernel();
for (u32 i = 0; i < MaxNvEvents; i++) {
std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
- events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label);
+ events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)};
events_interface.status[i] = EventState::Free;
events_interface.registered[i] = false;
}
auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
- devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
+ devices["/dev/nvhost-gpu"] =
+ std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
devices["/dev/nvmap"] = nvmap_dev;
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
- devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
+ devices["/dev/nvhost-ctrl"] =
+ std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
@@ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
if (events_interface.assigned_syncpt[i] == syncpoint_id &&
events_interface.assigned_value[i] == value) {
events_interface.LiberateEvent(i);
- events_interface.events[i].writable->Signal();
+ events_interface.events[i].event.writable->Signal();
}
}
}
std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
- return events_interface.events[event_id].readable;
+ return events_interface.events[event_id].event.readable;
}
std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
- return events_interface.events[event_id].writable;
+ return events_interface.events[event_id].event.writable;
}
} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 7706a5590..f3d863dac 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -10,6 +10,7 @@
#include "common/common_types.h"
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/nvdrv/nvdata.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/hle/service/service.h"
namespace Core {
@@ -22,15 +23,23 @@ class NVFlinger;
namespace Service::Nvidia {
+class SyncpointManager;
+
namespace Devices {
class nvdevice;
}
+/// Represents an Nvidia event
+struct NvEvent {
+ Kernel::EventPair event;
+ Fence fence{};
+};
+
struct EventInterface {
// Mask representing currently busy events
u64 events_mask{};
// Each kernel event associated to an NV event
- std::array<Kernel::EventPair, MaxNvEvents> events;
+ std::array<NvEvent, MaxNvEvents> events;
// The status of the current NVEvent
std::array<EventState, MaxNvEvents> status{};
// Tells if an NVEvent is registered or not
@@ -119,6 +128,9 @@ public:
std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;
private:
+ /// Manages syncpoints on the host
+ SyncpointManager syncpoint_manager;
+
/// Id to use for the next open file descriptor.
u32 next_fd = 1;
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
new file mode 100644
index 000000000..0151a03b7
--- /dev/null
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
@@ -0,0 +1,39 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
+#include "video_core/gpu.h"
+
+namespace Service::Nvidia {
+
+SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {}
+
+SyncpointManager::~SyncpointManager() = default;
+
+u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
+ syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
+ return GetSyncpointMin(syncpoint_id);
+}
+
+u32 SyncpointManager::AllocateSyncpoint() {
+ for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
+ if (!syncpoints[syncpoint_id].is_allocated) {
+ syncpoints[syncpoint_id].is_allocated = true;
+ return syncpoint_id;
+ }
+ }
+ UNREACHABLE_MSG("No more available syncpoints!");
+ return {};
+}
+
+u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
+ for (u32 index = 0; index < value; ++index) {
+ syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
+ }
+
+ return GetSyncpointMax(syncpoint_id);
+}
+
+} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.h b/src/core/hle/service/nvdrv/syncpoint_manager.h
new file mode 100644
index 000000000..4168b6c7e
--- /dev/null
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.h
@@ -0,0 +1,85 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+
+#include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Service::Nvidia {
+
+class SyncpointManager final {
+public:
+ explicit SyncpointManager(Tegra::GPU& gpu);
+ ~SyncpointManager();
+
+ /**
+ * Returns true if the specified syncpoint is expired for the given value.
+ * @param syncpoint_id Syncpoint ID to check.
+ * @param value Value to check against the specified syncpoint.
+ * @returns True if the specified syncpoint is expired for the given value, otherwise False.
+ */
+ bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
+ return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
+ }
+
+ /**
+ * Gets the lower bound for the specified syncpoint.
+ * @param syncpoint_id Syncpoint ID to get the lower bound for.
+ * @returns The lower bound for the specified syncpoint.
+ */
+ u32 GetSyncpointMin(u32 syncpoint_id) const {
+ return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed);
+ }
+
+ /**
+ * Gets the uper bound for the specified syncpoint.
+ * @param syncpoint_id Syncpoint ID to get the upper bound for.
+ * @returns The upper bound for the specified syncpoint.
+ */
+ u32 GetSyncpointMax(u32 syncpoint_id) const {
+ return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed);
+ }
+
+ /**
+ * Refreshes the minimum value for the specified syncpoint.
+ * @param syncpoint_id Syncpoint ID to be refreshed.
+ * @returns The new syncpoint minimum value.
+ */
+ u32 RefreshSyncpoint(u32 syncpoint_id);
+
+ /**
+ * Allocates a new syncoint.
+ * @returns The syncpoint ID for the newly allocated syncpoint.
+ */
+ u32 AllocateSyncpoint();
+
+ /**
+ * Increases the maximum value for the specified syncpoint.
+ * @param syncpoint_id Syncpoint ID to be increased.
+ * @param value Value to increase the specified syncpoint by.
+ * @returns The new syncpoint maximum value.
+ */
+ u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
+
+private:
+ struct Syncpoint {
+ std::atomic<u32> min;
+ std::atomic<u32> max;
+ std::atomic<bool> is_allocated;
+ };
+
+ std::array<Syncpoint, MaxSyncPoints> syncpoints{};
+
+ Tegra::GPU& gpu;
+};
+
+} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index c64673dba..44aa2bdae 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -242,6 +242,10 @@ void NVFlinger::Compose() {
const auto& igbp_buffer = buffer->get().igbp_buffer;
+ if (!system.IsPoweredOn()) {
+ return; // We are likely shutting down
+ }
+
auto& gpu = system.GPU();
const auto& multi_fence = buffer->get().multi_fence;
guard->unlock();
diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp
index c95feb0d7..b912188b6 100644
--- a/src/input_common/gcadapter/gc_adapter.cpp
+++ b/src/input_common/gcadapter/gc_adapter.cpp
@@ -21,26 +21,6 @@
namespace GCAdapter {
-// Used to loop through and assign button in poller
-constexpr std::array<PadButton, 12> PadButtonArray{
- PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT, PadButton::PAD_BUTTON_DOWN,
- PadButton::PAD_BUTTON_UP, PadButton::PAD_TRIGGER_Z, PadButton::PAD_TRIGGER_R,
- PadButton::PAD_TRIGGER_L, PadButton::PAD_BUTTON_A, PadButton::PAD_BUTTON_B,
- PadButton::PAD_BUTTON_X, PadButton::PAD_BUTTON_Y, PadButton::PAD_BUTTON_START,
-};
-
-static void PadToState(const GCPadStatus& pad, GCState& out_state) {
- for (const auto& button : PadButtonArray) {
- const auto button_key = static_cast<u16>(button);
- const auto button_value = (pad.button & button_key) != 0;
- out_state.buttons.insert_or_assign(static_cast<s32>(button_key), button_value);
- }
-
- for (std::size_t i = 0; i < pad.axis_values.size(); ++i) {
- out_state.axes.insert_or_assign(static_cast<u32>(i), pad.axis_values[i]);
- }
-}
-
Adapter::Adapter() {
if (usb_adapter_handle != nullptr) {
return;
@@ -49,168 +29,263 @@ Adapter::Adapter() {
const int init_res = libusb_init(&libusb_ctx);
if (init_res == LIBUSB_SUCCESS) {
- Setup();
+ adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this);
} else {
LOG_ERROR(Input, "libusb could not be initialized. failed with error = {}", init_res);
}
}
-GCPadStatus Adapter::GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload) {
- GCPadStatus pad = {};
- const std::size_t offset = 1 + (9 * port);
+Adapter::~Adapter() {
+ Reset();
+}
+
+void Adapter::AdapterInputThread() {
+ LOG_DEBUG(Input, "GC Adapter input thread started");
+ s32 payload_size{};
+ AdapterPayload adapter_payload{};
+
+ if (adapter_scan_thread.joinable()) {
+ adapter_scan_thread.join();
+ }
+
+ while (adapter_input_thread_running) {
+ libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(),
+ static_cast<s32>(adapter_payload.size()), &payload_size, 16);
+ if (IsPayloadCorrect(adapter_payload, payload_size)) {
+ UpdateControllers(adapter_payload);
+ UpdateVibrations();
+ }
+ std::this_thread::yield();
+ }
- adapter_controllers_status[port] = static_cast<ControllerTypes>(adapter_payload[offset] >> 4);
+ if (restart_scan_thread) {
+ adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this);
+ restart_scan_thread = false;
+ }
+}
+
+bool Adapter::IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size) {
+ if (payload_size != static_cast<s32>(adapter_payload.size()) ||
+ adapter_payload[0] != LIBUSB_DT_HID) {
+ LOG_DEBUG(Input, "Error reading payload (size: {}, type: {:02x})", payload_size,
+ adapter_payload[0]);
+ if (input_error_counter++ > 20) {
+ LOG_ERROR(Input, "GC adapter timeout, Is the adapter connected?");
+ adapter_input_thread_running = false;
+ restart_scan_thread = true;
+ }
+ return false;
+ }
+
+ input_error_counter = 0;
+ return true;
+}
+
+void Adapter::UpdateControllers(const AdapterPayload& adapter_payload) {
+ for (std::size_t port = 0; port < pads.size(); ++port) {
+ const std::size_t offset = 1 + (9 * port);
+ const auto type = static_cast<ControllerTypes>(adapter_payload[offset] >> 4);
+ UpdatePadType(port, type);
+ if (DeviceConnected(port)) {
+ const u8 b1 = adapter_payload[offset + 1];
+ const u8 b2 = adapter_payload[offset + 2];
+ UpdateStateButtons(port, b1, b2);
+ UpdateStateAxes(port, adapter_payload);
+ if (configuring) {
+ UpdateYuzuSettings(port);
+ }
+ }
+ }
+}
+
+void Adapter::UpdatePadType(std::size_t port, ControllerTypes pad_type) {
+ if (pads[port].type == pad_type) {
+ return;
+ }
+ // Device changed reset device and set new type
+ ResetDevice(port);
+ pads[port].type = pad_type;
+}
+
+void Adapter::UpdateStateButtons(std::size_t port, u8 b1, u8 b2) {
+ if (port >= pads.size()) {
+ return;
+ }
static constexpr std::array<PadButton, 8> b1_buttons{
- PadButton::PAD_BUTTON_A, PadButton::PAD_BUTTON_B, PadButton::PAD_BUTTON_X,
- PadButton::PAD_BUTTON_Y, PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT,
- PadButton::PAD_BUTTON_DOWN, PadButton::PAD_BUTTON_UP,
+ PadButton::ButtonA, PadButton::ButtonB, PadButton::ButtonX, PadButton::ButtonY,
+ PadButton::ButtonLeft, PadButton::ButtonRight, PadButton::ButtonDown, PadButton::ButtonUp,
};
static constexpr std::array<PadButton, 4> b2_buttons{
- PadButton::PAD_BUTTON_START,
- PadButton::PAD_TRIGGER_Z,
- PadButton::PAD_TRIGGER_R,
- PadButton::PAD_TRIGGER_L,
+ PadButton::ButtonStart,
+ PadButton::TriggerZ,
+ PadButton::TriggerR,
+ PadButton::TriggerL,
};
+ pads[port].buttons = 0;
+ for (std::size_t i = 0; i < b1_buttons.size(); ++i) {
+ if ((b1 & (1U << i)) != 0) {
+ pads[port].buttons =
+ static_cast<u16>(pads[port].buttons | static_cast<u16>(b1_buttons[i]));
+ pads[port].last_button = b1_buttons[i];
+ }
+ }
+ for (std::size_t j = 0; j < b2_buttons.size(); ++j) {
+ if ((b2 & (1U << j)) != 0) {
+ pads[port].buttons =
+ static_cast<u16>(pads[port].buttons | static_cast<u16>(b2_buttons[j]));
+ pads[port].last_button = b2_buttons[j];
+ }
+ }
+}
+
+void Adapter::UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload) {
+ if (port >= pads.size()) {
+ return;
+ }
+
+ const std::size_t offset = 1 + (9 * port);
static constexpr std::array<PadAxes, 6> axes{
PadAxes::StickX, PadAxes::StickY, PadAxes::SubstickX,
PadAxes::SubstickY, PadAxes::TriggerLeft, PadAxes::TriggerRight,
};
- if (adapter_controllers_status[port] == ControllerTypes::None && !get_origin[port]) {
- // Controller may have been disconnected, recalibrate if reconnected.
- get_origin[port] = true;
+ for (const PadAxes axis : axes) {
+ const auto index = static_cast<std::size_t>(axis);
+ const u8 axis_value = adapter_payload[offset + 3 + index];
+ if (pads[port].axis_origin[index] == 255) {
+ pads[port].axis_origin[index] = axis_value;
+ }
+ pads[port].axis_values[index] =
+ static_cast<s16>(axis_value - pads[port].axis_origin[index]);
}
+}
- if (adapter_controllers_status[port] != ControllerTypes::None) {
- const u8 b1 = adapter_payload[offset + 1];
- const u8 b2 = adapter_payload[offset + 2];
+void Adapter::UpdateYuzuSettings(std::size_t port) {
+ if (port >= pads.size()) {
+ return;
+ }
- for (std::size_t i = 0; i < b1_buttons.size(); ++i) {
- if ((b1 & (1U << i)) != 0) {
- pad.button = static_cast<u16>(pad.button | static_cast<u16>(b1_buttons[i]));
- }
- }
+ constexpr u8 axis_threshold = 50;
+ GCPadStatus pad_status = {.port = port};
- for (std::size_t j = 0; j < b2_buttons.size(); ++j) {
- if ((b2 & (1U << j)) != 0) {
- pad.button = static_cast<u16>(pad.button | static_cast<u16>(b2_buttons[j]));
- }
- }
- for (PadAxes axis : axes) {
- const auto index = static_cast<std::size_t>(axis);
- pad.axis_values[index] = adapter_payload[offset + 3 + index];
- }
+ if (pads[port].buttons != 0) {
+ pad_status.button = pads[port].last_button;
+ pad_queue.Push(pad_status);
+ }
+
+ // Accounting for a threshold here to ensure an intentional press
+ for (std::size_t i = 0; i < pads[port].axis_values.size(); ++i) {
+ const s16 value = pads[port].axis_values[i];
- if (get_origin[port]) {
- origin_status[port].axis_values = pad.axis_values;
- get_origin[port] = false;
+ if (value > axis_threshold || value < -axis_threshold) {
+ pad_status.axis = static_cast<PadAxes>(i);
+ pad_status.axis_value = value;
+ pad_status.axis_threshold = axis_threshold;
+ pad_queue.Push(pad_status);
}
}
- return pad;
}
-void Adapter::Read() {
- LOG_DEBUG(Input, "GC Adapter Read() thread started");
+void Adapter::UpdateVibrations() {
+ // Use 8 states to keep the switching between on/off fast enough for
+ // a human to not notice the difference between switching from on/off
+ // More states = more rumble strengths = slower update time
+ constexpr u8 vibration_states = 8;
- int payload_size;
- std::array<u8, 37> adapter_payload;
- std::array<GCPadStatus, 4> pads;
-
- while (adapter_thread_running) {
- libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(),
- sizeof(adapter_payload), &payload_size, 16);
-
- if (payload_size != sizeof(adapter_payload) || adapter_payload[0] != LIBUSB_DT_HID) {
- LOG_ERROR(Input,
- "Error reading payload (size: {}, type: {:02x}) Is the adapter connected?",
- payload_size, adapter_payload[0]);
- adapter_thread_running = false; // error reading from adapter, stop reading.
- break;
- }
- for (std::size_t port = 0; port < pads.size(); ++port) {
- pads[port] = GetPadStatus(port, adapter_payload);
- if (DeviceConnected(port) && configuring) {
- if (pads[port].button != 0) {
- pad_queue[port].Push(pads[port]);
- }
+ vibration_counter = (vibration_counter + 1) % vibration_states;
- // Accounting for a threshold here to ensure an intentional press
- for (size_t i = 0; i < pads[port].axis_values.size(); ++i) {
- const u8 value = pads[port].axis_values[i];
- const u8 origin = origin_status[port].axis_values[i];
-
- if (value > origin + pads[port].THRESHOLD ||
- value < origin - pads[port].THRESHOLD) {
- pads[port].axis = static_cast<PadAxes>(i);
- pads[port].axis_value = pads[port].axis_values[i];
- pad_queue[port].Push(pads[port]);
- }
- }
- }
- PadToState(pads[port], state[port]);
- }
- std::this_thread::yield();
+ for (GCController& pad : pads) {
+ const bool vibrate = pad.rumble_amplitude > vibration_counter;
+ vibration_changed |= vibrate != pad.enable_vibration;
+ pad.enable_vibration = vibrate;
}
+ SendVibrations();
}
-void Adapter::Setup() {
- // Initialize all controllers as unplugged
- adapter_controllers_status.fill(ControllerTypes::None);
- // Initialize all ports to store axis origin values
- get_origin.fill(true);
-
- // pointer to list of connected usb devices
- libusb_device** devices{};
-
- // populate the list of devices, get the count
- const ssize_t device_count = libusb_get_device_list(libusb_ctx, &devices);
- if (device_count < 0) {
- LOG_ERROR(Input, "libusb_get_device_list failed with error: {}", device_count);
+void Adapter::SendVibrations() {
+ if (!rumble_enabled || !vibration_changed) {
return;
}
-
- if (devices != nullptr) {
- for (std::size_t index = 0; index < static_cast<std::size_t>(device_count); ++index) {
- if (CheckDeviceAccess(devices[index])) {
- // GC Adapter found and accessible, registering it
- GetGCEndpoint(devices[index]);
- break;
- }
+ s32 size{};
+ constexpr u8 rumble_command = 0x11;
+ const u8 p1 = pads[0].enable_vibration;
+ const u8 p2 = pads[1].enable_vibration;
+ const u8 p3 = pads[2].enable_vibration;
+ const u8 p4 = pads[3].enable_vibration;
+ std::array<u8, 5> payload = {rumble_command, p1, p2, p3, p4};
+ const int err = libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, payload.data(),
+ static_cast<s32>(payload.size()), &size, 16);
+ if (err) {
+ LOG_DEBUG(Input, "Adapter libusb write failed: {}", libusb_error_name(err));
+ if (output_error_counter++ > 5) {
+ LOG_ERROR(Input, "GC adapter output timeout, Rumble disabled");
+ rumble_enabled = false;
}
- libusb_free_device_list(devices, 1);
+ return;
}
+ output_error_counter = 0;
+ vibration_changed = false;
}
-bool Adapter::CheckDeviceAccess(libusb_device* device) {
- libusb_device_descriptor desc;
- const int get_descriptor_error = libusb_get_device_descriptor(device, &desc);
- if (get_descriptor_error) {
- // could not acquire the descriptor, no point in trying to use it.
- LOG_ERROR(Input, "libusb_get_device_descriptor failed with error: {}",
- get_descriptor_error);
- return false;
+bool Adapter::RumblePlay(std::size_t port, f32 amplitude) {
+ amplitude = std::clamp(amplitude, 0.0f, 1.0f);
+ const auto raw_amp = static_cast<u8>(amplitude * 0x8);
+ pads[port].rumble_amplitude = raw_amp;
+
+ return rumble_enabled;
+}
+
+void Adapter::AdapterScanThread() {
+ adapter_scan_thread_running = true;
+ adapter_input_thread_running = false;
+ if (adapter_input_thread.joinable()) {
+ adapter_input_thread.join();
+ }
+ ClearLibusbHandle();
+ ResetDevices();
+ while (adapter_scan_thread_running && !adapter_input_thread_running) {
+ Setup();
+ std::this_thread::sleep_for(std::chrono::seconds(1));
}
+}
- if (desc.idVendor != 0x057e || desc.idProduct != 0x0337) {
- // This isn't the device we are looking for.
- return false;
+void Adapter::Setup() {
+ usb_adapter_handle = libusb_open_device_with_vid_pid(libusb_ctx, 0x057e, 0x0337);
+
+ if (usb_adapter_handle == NULL) {
+ return;
+ }
+ if (!CheckDeviceAccess()) {
+ ClearLibusbHandle();
+ return;
}
- const int open_error = libusb_open(device, &usb_adapter_handle);
- if (open_error == LIBUSB_ERROR_ACCESS) {
- LOG_ERROR(Input, "Yuzu can not gain access to this device: ID {:04X}:{:04X}.",
- desc.idVendor, desc.idProduct);
- return false;
+ libusb_device* device = libusb_get_device(usb_adapter_handle);
+
+ LOG_INFO(Input, "GC adapter is now connected");
+ // GC Adapter found and accessible, registering it
+ if (GetGCEndpoint(device)) {
+ adapter_scan_thread_running = false;
+ adapter_input_thread_running = true;
+ rumble_enabled = true;
+ input_error_counter = 0;
+ output_error_counter = 0;
+ adapter_input_thread = std::thread(&Adapter::AdapterInputThread, this);
}
- if (open_error) {
- LOG_ERROR(Input, "libusb_open failed to open device with error = {}", open_error);
- return false;
+}
+
+bool Adapter::CheckDeviceAccess() {
+ // This fixes payload problems from offbrand GCAdapters
+ const s32 control_transfer_error =
+ libusb_control_transfer(usb_adapter_handle, 0x21, 11, 0x0001, 0, nullptr, 0, 1000);
+ if (control_transfer_error < 0) {
+ LOG_ERROR(Input, "libusb_control_transfer failed with error= {}", control_transfer_error);
}
- int kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0);
+ s32 kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0);
if (kernel_driver_error == 1) {
kernel_driver_error = libusb_detach_kernel_driver(usb_adapter_handle, 0);
if (kernel_driver_error != 0 && kernel_driver_error != LIBUSB_ERROR_NOT_SUPPORTED) {
@@ -236,13 +311,13 @@ bool Adapter::CheckDeviceAccess(libusb_device* device) {
return true;
}
-void Adapter::GetGCEndpoint(libusb_device* device) {
+bool Adapter::GetGCEndpoint(libusb_device* device) {
libusb_config_descriptor* config = nullptr;
const int config_descriptor_return = libusb_get_config_descriptor(device, 0, &config);
if (config_descriptor_return != LIBUSB_SUCCESS) {
LOG_ERROR(Input, "libusb_get_config_descriptor failed with error = {}",
config_descriptor_return);
- return;
+ return false;
}
for (u8 ic = 0; ic < config->bNumInterfaces; ic++) {
@@ -264,31 +339,51 @@ void Adapter::GetGCEndpoint(libusb_device* device) {
unsigned char clear_payload = 0x13;
libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, &clear_payload,
sizeof(clear_payload), nullptr, 16);
-
- adapter_thread_running = true;
- adapter_input_thread = std::thread(&Adapter::Read, this);
+ return true;
}
-Adapter::~Adapter() {
- Reset();
-}
+void Adapter::JoinThreads() {
+ restart_scan_thread = false;
+ adapter_input_thread_running = false;
+ adapter_scan_thread_running = false;
-void Adapter::Reset() {
- if (adapter_thread_running) {
- adapter_thread_running = false;
+ if (adapter_scan_thread.joinable()) {
+ adapter_scan_thread.join();
}
+
if (adapter_input_thread.joinable()) {
adapter_input_thread.join();
}
+}
- adapter_controllers_status.fill(ControllerTypes::None);
- get_origin.fill(true);
-
+void Adapter::ClearLibusbHandle() {
if (usb_adapter_handle) {
libusb_release_interface(usb_adapter_handle, 1);
libusb_close(usb_adapter_handle);
usb_adapter_handle = nullptr;
}
+}
+
+void Adapter::ResetDevices() {
+ for (std::size_t i = 0; i < pads.size(); ++i) {
+ ResetDevice(i);
+ }
+}
+
+void Adapter::ResetDevice(std::size_t port) {
+ pads[port].type = ControllerTypes::None;
+ pads[port].enable_vibration = false;
+ pads[port].rumble_amplitude = 0;
+ pads[port].buttons = 0;
+ pads[port].last_button = PadButton::Undefined;
+ pads[port].axis_values.fill(0);
+ pads[port].axis_origin.fill(255);
+}
+
+void Adapter::Reset() {
+ JoinThreads();
+ ClearLibusbHandle();
+ ResetDevices();
if (libusb_ctx) {
libusb_exit(libusb_ctx);
@@ -297,11 +392,11 @@ void Adapter::Reset() {
std::vector<Common::ParamPackage> Adapter::GetInputDevices() const {
std::vector<Common::ParamPackage> devices;
- for (std::size_t port = 0; port < state.size(); ++port) {
+ for (std::size_t port = 0; port < pads.size(); ++port) {
if (!DeviceConnected(port)) {
continue;
}
- std::string name = fmt::format("Gamecube Controller {}", port);
+ std::string name = fmt::format("Gamecube Controller {}", port + 1);
devices.emplace_back(Common::ParamPackage{
{"class", "gcpad"},
{"display", std::move(name)},
@@ -318,18 +413,18 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice(
// This list also excludes any button that can't be really mapped
static constexpr std::array<std::pair<Settings::NativeButton::Values, PadButton>, 12>
switch_to_gcadapter_button = {
- std::pair{Settings::NativeButton::A, PadButton::PAD_BUTTON_A},
- {Settings::NativeButton::B, PadButton::PAD_BUTTON_B},
- {Settings::NativeButton::X, PadButton::PAD_BUTTON_X},
- {Settings::NativeButton::Y, PadButton::PAD_BUTTON_Y},
- {Settings::NativeButton::Plus, PadButton::PAD_BUTTON_START},
- {Settings::NativeButton::DLeft, PadButton::PAD_BUTTON_LEFT},
- {Settings::NativeButton::DUp, PadButton::PAD_BUTTON_UP},
- {Settings::NativeButton::DRight, PadButton::PAD_BUTTON_RIGHT},
- {Settings::NativeButton::DDown, PadButton::PAD_BUTTON_DOWN},
- {Settings::NativeButton::SL, PadButton::PAD_TRIGGER_L},
- {Settings::NativeButton::SR, PadButton::PAD_TRIGGER_R},
- {Settings::NativeButton::R, PadButton::PAD_TRIGGER_Z},
+ std::pair{Settings::NativeButton::A, PadButton::ButtonA},
+ {Settings::NativeButton::B, PadButton::ButtonB},
+ {Settings::NativeButton::X, PadButton::ButtonX},
+ {Settings::NativeButton::Y, PadButton::ButtonY},
+ {Settings::NativeButton::Plus, PadButton::ButtonStart},
+ {Settings::NativeButton::DLeft, PadButton::ButtonLeft},
+ {Settings::NativeButton::DUp, PadButton::ButtonUp},
+ {Settings::NativeButton::DRight, PadButton::ButtonRight},
+ {Settings::NativeButton::DDown, PadButton::ButtonDown},
+ {Settings::NativeButton::SL, PadButton::TriggerL},
+ {Settings::NativeButton::SR, PadButton::TriggerR},
+ {Settings::NativeButton::R, PadButton::TriggerZ},
};
if (!params.Has("port")) {
return {};
@@ -352,8 +447,10 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice(
for (const auto& [switch_button, gcadapter_axis] : switch_to_gcadapter_axis) {
Common::ParamPackage button_params({{"engine", "gcpad"}});
button_params.Set("port", params.Get("port", 0));
- button_params.Set("button", static_cast<int>(PadButton::PAD_STICK));
- button_params.Set("axis", static_cast<int>(gcadapter_axis));
+ button_params.Set("button", static_cast<s32>(PadButton::Stick));
+ button_params.Set("axis", static_cast<s32>(gcadapter_axis));
+ button_params.Set("threshold", 0.5f);
+ button_params.Set("direction", "+");
mapping.insert_or_assign(switch_button, std::move(button_params));
}
return mapping;
@@ -382,46 +479,33 @@ InputCommon::AnalogMapping Adapter::GetAnalogMappingForDevice(
}
bool Adapter::DeviceConnected(std::size_t port) const {
- return adapter_controllers_status[port] != ControllerTypes::None;
-}
-
-void Adapter::ResetDeviceType(std::size_t port) {
- adapter_controllers_status[port] = ControllerTypes::None;
+ return pads[port].type != ControllerTypes::None;
}
void Adapter::BeginConfiguration() {
- get_origin.fill(true);
- for (auto& pq : pad_queue) {
- pq.Clear();
- }
+ pad_queue.Clear();
configuring = true;
}
void Adapter::EndConfiguration() {
- for (auto& pq : pad_queue) {
- pq.Clear();
- }
+ pad_queue.Clear();
configuring = false;
}
-std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() {
+Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() {
return pad_queue;
}
-const std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() const {
+const Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() const {
return pad_queue;
}
-std::array<GCState, 4>& Adapter::GetPadState() {
- return state;
-}
-
-const std::array<GCState, 4>& Adapter::GetPadState() const {
- return state;
+GCController& Adapter::GetPadState(std::size_t port) {
+ return pads.at(port);
}
-int Adapter::GetOriginValue(u32 port, u32 axis) const {
- return origin_status[port].axis_values[axis];
+const GCController& Adapter::GetPadState(std::size_t port) const {
+ return pads.at(port);
}
} // namespace GCAdapter
diff --git a/src/input_common/gcadapter/gc_adapter.h b/src/input_common/gcadapter/gc_adapter.h
index 4f5f3de8e..d28dcfad3 100644
--- a/src/input_common/gcadapter/gc_adapter.h
+++ b/src/input_common/gcadapter/gc_adapter.h
@@ -19,24 +19,23 @@ struct libusb_device_handle;
namespace GCAdapter {
enum class PadButton {
- PAD_BUTTON_LEFT = 0x0001,
- PAD_BUTTON_RIGHT = 0x0002,
- PAD_BUTTON_DOWN = 0x0004,
- PAD_BUTTON_UP = 0x0008,
- PAD_TRIGGER_Z = 0x0010,
- PAD_TRIGGER_R = 0x0020,
- PAD_TRIGGER_L = 0x0040,
- PAD_BUTTON_A = 0x0100,
- PAD_BUTTON_B = 0x0200,
- PAD_BUTTON_X = 0x0400,
- PAD_BUTTON_Y = 0x0800,
- PAD_BUTTON_START = 0x1000,
+ Undefined = 0x0000,
+ ButtonLeft = 0x0001,
+ ButtonRight = 0x0002,
+ ButtonDown = 0x0004,
+ ButtonUp = 0x0008,
+ TriggerZ = 0x0010,
+ TriggerR = 0x0020,
+ TriggerL = 0x0040,
+ ButtonA = 0x0100,
+ ButtonB = 0x0200,
+ ButtonX = 0x0400,
+ ButtonY = 0x0800,
+ ButtonStart = 0x1000,
// Below is for compatibility with "AxisButton" type
- PAD_STICK = 0x2000,
+ Stick = 0x2000,
};
-extern const std::array<PadButton, 12> PadButtonArray;
-
enum class PadAxes : u8 {
StickX,
StickY,
@@ -47,87 +46,122 @@ enum class PadAxes : u8 {
Undefined,
};
+enum class ControllerTypes {
+ None,
+ Wired,
+ Wireless,
+};
+
struct GCPadStatus {
- u16 button{}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits
+ std::size_t port{};
- std::array<u8, 6> axis_values{}; // Triggers and sticks, following indices defined in PadAxes
- static constexpr u8 THRESHOLD = 50; // Threshold for axis press for polling
+ PadButton button{PadButton::Undefined}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits
- u8 port{};
PadAxes axis{PadAxes::Undefined};
- u8 axis_value{255};
+ s16 axis_value{};
+ u8 axis_threshold{50};
};
-struct GCState {
- std::unordered_map<int, bool> buttons;
- std::unordered_map<u32, u16> axes;
+struct GCController {
+ ControllerTypes type{};
+ bool enable_vibration{};
+ u8 rumble_amplitude{};
+ u16 buttons{};
+ PadButton last_button{};
+ std::array<s16, 6> axis_values{};
+ std::array<u8, 6> axis_origin{};
};
-enum class ControllerTypes { None, Wired, Wireless };
-
class Adapter {
public:
- /// Initialize the GC Adapter capture and read sequence
Adapter();
-
- /// Close the adapter read thread and release the adapter
~Adapter();
+
+ /// Request a vibration for a controlelr
+ bool RumblePlay(std::size_t port, f32 amplitude);
+
/// Used for polling
void BeginConfiguration();
void EndConfiguration();
+ Common::SPSCQueue<GCPadStatus>& GetPadQueue();
+ const Common::SPSCQueue<GCPadStatus>& GetPadQueue() const;
+
+ GCController& GetPadState(std::size_t port);
+ const GCController& GetPadState(std::size_t port) const;
+
+ /// Returns true if there is a device connected to port
+ bool DeviceConnected(std::size_t port) const;
+
+ /// Used for automapping features
std::vector<Common::ParamPackage> GetInputDevices() const;
InputCommon::ButtonMapping GetButtonMappingForDevice(const Common::ParamPackage& params) const;
InputCommon::AnalogMapping GetAnalogMappingForDevice(const Common::ParamPackage& params) const;
- /// Returns true if there is a device connected to port
- bool DeviceConnected(std::size_t port) const;
+private:
+ using AdapterPayload = std::array<u8, 37>;
- std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue();
- const std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue() const;
+ void UpdatePadType(std::size_t port, ControllerTypes pad_type);
+ void UpdateControllers(const AdapterPayload& adapter_payload);
+ void UpdateYuzuSettings(std::size_t port);
+ void UpdateStateButtons(std::size_t port, u8 b1, u8 b2);
+ void UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload);
+ void UpdateVibrations();
- std::array<GCState, 4>& GetPadState();
- const std::array<GCState, 4>& GetPadState() const;
+ void AdapterInputThread();
- int GetOriginValue(u32 port, u32 axis) const;
+ void AdapterScanThread();
-private:
- GCPadStatus GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload);
+ bool IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size);
+
+ // Updates vibration state of all controllers
+ void SendVibrations();
+
+ /// For use in initialization, querying devices to find the adapter
+ void Setup();
- void Read();
+ /// Resets status of all GC controller devices to a disconected state
+ void ResetDevices();
- /// Resets status of device connected to port
- void ResetDeviceType(std::size_t port);
+ /// Resets status of device connected to a disconected state
+ void ResetDevice(std::size_t port);
/// Returns true if we successfully gain access to GC Adapter
- bool CheckDeviceAccess(libusb_device* device);
+ bool CheckDeviceAccess();
- /// Captures GC Adapter endpoint address,
- void GetGCEndpoint(libusb_device* device);
+ /// Captures GC Adapter endpoint address
+ /// Returns true if the endpoind was set correctly
+ bool GetGCEndpoint(libusb_device* device);
/// For shutting down, clear all data, join all threads, release usb
void Reset();
- /// For use in initialization, querying devices to find the adapter
- void Setup();
+ // Join all threads
+ void JoinThreads();
+
+ // Release usb handles
+ void ClearLibusbHandle();
libusb_device_handle* usb_adapter_handle = nullptr;
+ std::array<GCController, 4> pads;
+ Common::SPSCQueue<GCPadStatus> pad_queue;
std::thread adapter_input_thread;
- bool adapter_thread_running;
+ std::thread adapter_scan_thread;
+ bool adapter_input_thread_running;
+ bool adapter_scan_thread_running;
+ bool restart_scan_thread;
libusb_context* libusb_ctx;
- u8 input_endpoint = 0;
- u8 output_endpoint = 0;
-
- bool configuring = false;
+ u8 input_endpoint{0};
+ u8 output_endpoint{0};
+ u8 input_error_counter{0};
+ u8 output_error_counter{0};
+ int vibration_counter{0};
- std::array<GCState, 4> state;
- std::array<bool, 4> get_origin;
- std::array<GCPadStatus, 4> origin_status;
- std::array<Common::SPSCQueue<GCPadStatus>, 4> pad_queue;
- std::array<ControllerTypes, 4> adapter_controllers_status{};
+ bool configuring{false};
+ bool rumble_enabled{true};
+ bool vibration_changed{true};
};
-
} // namespace GCAdapter
diff --git a/src/input_common/gcadapter/gc_poller.cpp b/src/input_common/gcadapter/gc_poller.cpp
index 893556916..6bd6f57fc 100644
--- a/src/input_common/gcadapter/gc_poller.cpp
+++ b/src/input_common/gcadapter/gc_poller.cpp
@@ -15,22 +15,30 @@ namespace InputCommon {
class GCButton final : public Input::ButtonDevice {
public:
- explicit GCButton(u32 port_, int button_, const GCAdapter::Adapter* adapter)
+ explicit GCButton(u32 port_, s32 button_, GCAdapter::Adapter* adapter)
: port(port_), button(button_), gcadapter(adapter) {}
~GCButton() override;
bool GetStatus() const override {
if (gcadapter->DeviceConnected(port)) {
- return gcadapter->GetPadState()[port].buttons.at(button);
+ return (gcadapter->GetPadState(port).buttons & button) != 0;
}
return false;
}
+ bool SetRumblePlay(f32 amp_high, f32 amp_low, f32 freq_high, f32 freq_low) const override {
+ const float amplitude = amp_high + amp_low > 2.0f ? 1.0f : (amp_high + amp_low) * 0.5f;
+ const auto new_amp =
+ static_cast<f32>(pow(amplitude, 0.5f) * (3.0f - 2.0f * pow(amplitude, 0.15f)));
+
+ return gcadapter->RumblePlay(port, new_amp);
+ }
+
private:
const u32 port;
- const int button;
- const GCAdapter::Adapter* gcadapter;
+ const s32 button;
+ GCAdapter::Adapter* gcadapter;
};
class GCAxisButton final : public Input::ButtonDevice {
@@ -38,13 +46,12 @@ public:
explicit GCAxisButton(u32 port_, u32 axis_, float threshold_, bool trigger_if_greater_,
const GCAdapter::Adapter* adapter)
: port(port_), axis(axis_), threshold(threshold_), trigger_if_greater(trigger_if_greater_),
- gcadapter(adapter),
- origin_value(static_cast<float>(adapter->GetOriginValue(port_, axis_))) {}
+ gcadapter(adapter) {}
bool GetStatus() const override {
if (gcadapter->DeviceConnected(port)) {
- const float current_axis_value = gcadapter->GetPadState()[port].axes.at(axis);
- const float axis_value = (current_axis_value - origin_value) / 128.0f;
+ const float current_axis_value = gcadapter->GetPadState(port).axis_values.at(axis);
+ const float axis_value = current_axis_value / 128.0f;
if (trigger_if_greater) {
// TODO: Might be worthwile to set a slider for the trigger threshold. It is
// currently always set to 0.5 in configure_input_player.cpp ZL/ZR HandleClick
@@ -61,7 +68,6 @@ private:
float threshold;
bool trigger_if_greater;
const GCAdapter::Adapter* gcadapter;
- const float origin_value;
};
GCButtonFactory::GCButtonFactory(std::shared_ptr<GCAdapter::Adapter> adapter_)
@@ -73,7 +79,7 @@ std::unique_ptr<Input::ButtonDevice> GCButtonFactory::Create(const Common::Param
const auto button_id = params.Get("button", 0);
const auto port = static_cast<u32>(params.Get("port", 0));
- constexpr int PAD_STICK_ID = static_cast<u16>(GCAdapter::PadButton::PAD_STICK);
+ constexpr s32 PAD_STICK_ID = static_cast<s32>(GCAdapter::PadButton::Stick);
// button is not an axis/stick button
if (button_id != PAD_STICK_ID) {
@@ -106,32 +112,25 @@ Common::ParamPackage GCButtonFactory::GetNextInput() const {
Common::ParamPackage params;
GCAdapter::GCPadStatus pad;
auto& queue = adapter->GetPadQueue();
- for (std::size_t port = 0; port < queue.size(); ++port) {
- while (queue[port].Pop(pad)) {
- // This while loop will break on the earliest detected button
- params.Set("engine", "gcpad");
- params.Set("port", static_cast<int>(port));
- for (const auto& button : GCAdapter::PadButtonArray) {
- const u16 button_value = static_cast<u16>(button);
- if (pad.button & button_value) {
- params.Set("button", button_value);
- break;
- }
- }
+ while (queue.Pop(pad)) {
+ // This while loop will break on the earliest detected button
+ params.Set("engine", "gcpad");
+ params.Set("port", static_cast<s32>(pad.port));
+ if (pad.button != GCAdapter::PadButton::Undefined) {
+ params.Set("button", static_cast<u16>(pad.button));
+ }
- // For Axis button implementation
- if (pad.axis != GCAdapter::PadAxes::Undefined) {
- params.Set("axis", static_cast<u8>(pad.axis));
- params.Set("button", static_cast<u16>(GCAdapter::PadButton::PAD_STICK));
- if (pad.axis_value > 128) {
- params.Set("direction", "+");
- params.Set("threshold", "0.25");
- } else {
- params.Set("direction", "-");
- params.Set("threshold", "-0.25");
- }
- break;
+ // For Axis button implementation
+ if (pad.axis != GCAdapter::PadAxes::Undefined) {
+ params.Set("axis", static_cast<u8>(pad.axis));
+ params.Set("button", static_cast<u16>(GCAdapter::PadButton::Stick));
+ params.Set("threshold", "0.25");
+ if (pad.axis_value > 0) {
+ params.Set("direction", "+");
+ } else {
+ params.Set("direction", "-");
}
+ break;
}
}
return params;
@@ -152,17 +151,14 @@ public:
explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_,
const GCAdapter::Adapter* adapter, float range_)
: port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter),
- origin_value_x(static_cast<float>(adapter->GetOriginValue(port_, axis_x_))),
- origin_value_y(static_cast<float>(adapter->GetOriginValue(port_, axis_y_))),
range(range_) {}
float GetAxis(u32 axis) const {
if (gcadapter->DeviceConnected(port)) {
std::lock_guard lock{mutex};
- const auto origin_value = axis % 2 == 0 ? origin_value_x : origin_value_y;
const auto axis_value =
- static_cast<float>(gcadapter->GetPadState()[port].axes.at(axis));
- return (axis_value - origin_value) / (100.0f * range);
+ static_cast<float>(gcadapter->GetPadState(port).axis_values.at(axis));
+ return (axis_value) / (100.0f * range);
}
return 0.0f;
}
@@ -215,8 +211,6 @@ private:
const u32 axis_y;
const float deadzone;
const GCAdapter::Adapter* gcadapter;
- const float origin_value_x;
- const float origin_value_y;
const float range;
mutable std::mutex mutex;
};
@@ -254,26 +248,44 @@ void GCAnalogFactory::EndConfiguration() {
Common::ParamPackage GCAnalogFactory::GetNextInput() {
GCAdapter::GCPadStatus pad;
+ Common::ParamPackage params;
auto& queue = adapter->GetPadQueue();
- for (std::size_t port = 0; port < queue.size(); ++port) {
- while (queue[port].Pop(pad)) {
- if (pad.axis == GCAdapter::PadAxes::Undefined ||
- std::abs((static_cast<float>(pad.axis_value) - 128.0f) / 128.0f) < 0.1f) {
- continue;
- }
- // An analog device needs two axes, so we need to store the axis for later and wait for
- // a second input event. The axes also must be from the same joystick.
- const u8 axis = static_cast<u8>(pad.axis);
- if (analog_x_axis == -1) {
- analog_x_axis = axis;
- controller_number = static_cast<int>(port);
- } else if (analog_y_axis == -1 && analog_x_axis != axis &&
- controller_number == static_cast<int>(port)) {
- analog_y_axis = axis;
- }
+ while (queue.Pop(pad)) {
+ if (pad.button != GCAdapter::PadButton::Undefined) {
+ params.Set("engine", "gcpad");
+ params.Set("port", static_cast<s32>(pad.port));
+ params.Set("button", static_cast<u16>(pad.button));
+ return params;
+ }
+ if (pad.axis == GCAdapter::PadAxes::Undefined ||
+ std::abs(static_cast<float>(pad.axis_value) / 128.0f) < 0.1f) {
+ continue;
+ }
+ // An analog device needs two axes, so we need to store the axis for later and wait for
+ // a second input event. The axes also must be from the same joystick.
+ const u8 axis = static_cast<u8>(pad.axis);
+ if (axis == 0 || axis == 1) {
+ analog_x_axis = 0;
+ analog_y_axis = 1;
+ controller_number = static_cast<s32>(pad.port);
+ break;
+ }
+ if (axis == 2 || axis == 3) {
+ analog_x_axis = 2;
+ analog_y_axis = 3;
+ controller_number = static_cast<s32>(pad.port);
+ break;
+ }
+
+ if (analog_x_axis == -1) {
+ analog_x_axis = axis;
+ controller_number = static_cast<s32>(pad.port);
+ } else if (analog_y_axis == -1 && analog_x_axis != axis &&
+ controller_number == static_cast<s32>(pad.port)) {
+ analog_y_axis = axis;
+ break;
}
}
- Common::ParamPackage params;
if (analog_x_axis != -1 && analog_y_axis != -1) {
params.Set("engine", "gcpad");
params.Set("port", controller_number);
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 2df410be8..1adf3cd13 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -4,6 +4,7 @@
#include <cstring>
#include <fstream>
+#include <vector>
#include "common/assert.h"
#include "video_core/command_classes/codecs/codec.h"
#include "video_core/command_classes/codecs/h264.h"
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 2e56daf29..5bbe6a332 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -5,8 +5,6 @@
#pragma once
#include <memory>
-#include <vector>
-#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/command_classes/nvdec_common.h"
@@ -44,11 +42,11 @@ public:
void Decode();
/// Returns most recently decoded frame
- AVFrame* GetCurrentFrame();
- const AVFrame* GetCurrentFrame() const;
+ [[nodiscard]] AVFrame* GetCurrentFrame();
+ [[nodiscard]] const AVFrame* GetCurrentFrame() const;
/// Returns the value of current_codec
- NvdecCommon::VideoCodec GetCurrentCodec() const;
+ [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
private:
bool initialized{};
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index 1a39f7b23..33e063e20 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -18,17 +18,33 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
+#include <array>
#include "common/bit_util.h"
#include "video_core/command_classes/codecs/h264.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
+namespace {
+// ZigZag LUTs from libavcodec.
+constexpr std::array<u8, 64> zig_zag_direct{
+ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
+ 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
+ 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+};
+
+constexpr std::array<u8, 16> zig_zag_scan{
+ 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+ 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
+};
+} // Anonymous namespace
+
H264::H264(GPU& gpu_) : gpu(gpu_) {}
H264::~H264() = default;
-std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) {
+const std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+ bool is_first_frame) {
H264DecoderContext context{};
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
@@ -48,7 +64,8 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo
writer.WriteU(0, 8);
writer.WriteU(31, 8);
writer.WriteUe(0);
- const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3;
+ const auto chroma_format_idc =
+ static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
writer.WriteUe(chroma_format_idc);
if (chroma_format_idc == 3) {
writer.WriteBit(false);
@@ -59,8 +76,8 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo
writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
writer.WriteBit(false); // Scaling matrix present flag
- const s32 order_cnt_type = static_cast<s32>((context.h264_parameter_set.flags >> 14) & 3);
- writer.WriteUe(static_cast<s32>((context.h264_parameter_set.flags >> 8) & 0xf));
+ const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
+ writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
writer.WriteUe(order_cnt_type);
if (order_cnt_type == 0) {
writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
@@ -100,7 +117,7 @@ std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bo
writer.WriteUe(0);
writer.WriteUe(0);
- writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag);
+ writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
writer.WriteBit(false);
writer.WriteUe(0);
writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
@@ -172,8 +189,8 @@ void H264BitWriter::WriteSe(s32 value) {
WriteExpGolombCodedInt(value);
}
-void H264BitWriter::WriteUe(s32 value) {
- WriteExpGolombCodedUInt((u32)value);
+void H264BitWriter::WriteUe(u32 value) {
+ WriteExpGolombCodedUInt(value);
}
void H264BitWriter::End() {
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index 21752dd90..273449495 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -38,7 +38,7 @@ public:
/// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
void WriteU(s32 value, s32 value_sz);
void WriteSe(s32 value);
- void WriteUe(s32 value);
+ void WriteUe(u32 value);
/// Finalize the bitstream
void End();
@@ -51,26 +51,14 @@ public:
void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
/// Return the bitstream as a vector.
- std::vector<u8>& GetByteArray();
- const std::vector<u8>& GetByteArray() const;
+ [[nodiscard]] std::vector<u8>& GetByteArray();
+ [[nodiscard]] const std::vector<u8>& GetByteArray() const;
private:
- // ZigZag LUTs from libavcodec.
- static constexpr std::array<u8, 64> zig_zag_direct{
- 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
- 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
- 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
- };
-
- static constexpr std::array<u8, 16> zig_zag_scan{
- 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
- 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
- };
-
void WriteBits(s32 value, s32 bit_count);
void WriteExpGolombCodedInt(s32 value);
void WriteExpGolombCodedUInt(u32 value);
- s32 GetFreeBufferBits();
+ [[nodiscard]] s32 GetFreeBufferBits();
void Flush();
s32 buffer_size{8};
@@ -86,8 +74,8 @@ public:
~H264();
/// Compose the H264 header of the frame for FFmpeg decoding
- std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
- bool is_first_frame = false);
+ [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+ bool is_first_frame = false);
private:
struct H264ParameterSet {
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index d205a8f5d..ab44fdc9e 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -9,7 +9,7 @@
#include "video_core/memory_manager.h"
namespace Tegra::Decoder {
-
+namespace {
// Default compressed header probabilities once frame context resets
constexpr Vp9EntropyProbs default_probs{
.y_mode_prob{
@@ -170,6 +170,89 @@ constexpr Vp9EntropyProbs default_probs{
.high_precision{128, 128},
};
+constexpr std::array<s32, 256> norm_lut{
+ 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+constexpr std::array<s32, 254> map_lut{
+ 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
+ 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54,
+ 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
+ 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
+ 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124,
+ 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+ 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177,
+ 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212,
+ 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17,
+ 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 19,
+};
+
+// 6.2.14 Tile size calculation
+
+[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) {
+ const s32 sb64_cols = (frame_width + 63) / 64;
+ s32 min_log2 = 0;
+
+ while ((64 << min_log2) < sb64_cols) {
+ min_log2++;
+ }
+
+ return min_log2;
+}
+
+[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) {
+ const s32 sb64_cols = (frame_width + 63) / 64;
+ s32 max_log2 = 1;
+
+ while ((sb64_cols >> max_log2) >= 4) {
+ max_log2++;
+ }
+
+ return max_log2 - 1;
+}
+
+// Recenters probability. Based on section 6.3.6 of VP9 Specification
+[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) {
+ if (new_prob > old_prob * 2) {
+ return new_prob;
+ }
+
+ if (new_prob >= old_prob) {
+ return (new_prob - old_prob) * 2;
+ }
+
+ return (old_prob - new_prob) * 2 - 1;
+}
+
+// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
+[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) {
+ new_prob--;
+ old_prob--;
+
+ std::size_t index{};
+
+ if (old_prob * 2 <= 0xff) {
+ index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
+ } else {
+ index = static_cast<std::size_t>(
+ std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
+ }
+
+ return map_lut[index];
+}
+} // Anonymous namespace
+
VP9::VP9(GPU& gpu) : gpu(gpu) {}
VP9::~VP9() = default;
@@ -207,32 +290,6 @@ void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_pro
EncodeTermSubExp(writer, delta);
}
-s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) {
- new_prob--;
- old_prob--;
-
- std::size_t index{};
-
- if (old_prob * 2 <= 0xff) {
- index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
- } else {
- index = static_cast<std::size_t>(
- std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
- }
-
- return map_lut[index];
-}
-
-s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) {
- if (new_prob > old_prob * 2) {
- return new_prob;
- } else if (new_prob >= old_prob) {
- return (new_prob - old_prob) * 2;
- } else {
- return (old_prob - new_prob) * 2 - 1;
- }
-}
-
void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
if (WriteLessThan(writer, value, 16)) {
writer.Write(value, 4);
@@ -332,28 +389,6 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
}
}
-s32 VP9::CalcMinLog2TileCols(s32 frame_width) {
- const s32 sb64_cols = (frame_width + 63) / 64;
- s32 min_log2 = 0;
-
- while ((64 << min_log2) < sb64_cols) {
- min_log2++;
- }
-
- return min_log2;
-}
-
-s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) {
- const s32 sb64_cols = (frameWidth + 63) / 64;
- s32 max_log2 = 1;
-
- while ((sb64_cols >> max_log2) >= 4) {
- max_log2++;
- }
-
- return max_log2 - 1;
-}
-
Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
PictureInfo picture_info{};
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
@@ -379,14 +414,14 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
Vp9FrameContainer frame{};
{
gpu.SyncGuestHost();
- frame.info = std::move(GetVp9PictureInfo(state));
+ frame.info = GetVp9PictureInfo(state);
frame.bit_stream.resize(frame.info.bitstream_size);
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(),
frame.info.bitstream_size);
}
// Buffer two frames, saving the last show frame info
- if (next_next_frame.bit_stream.size() != 0) {
+ if (!next_next_frame.bit_stream.empty()) {
Vp9FrameContainer temp{
.info = frame.info,
.bit_stream = frame.bit_stream,
@@ -396,15 +431,15 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
frame.bit_stream = next_next_frame.bit_stream;
next_next_frame = std::move(temp);
- if (next_frame.bit_stream.size() != 0) {
- Vp9FrameContainer temp{
+ if (!next_frame.bit_stream.empty()) {
+ Vp9FrameContainer temp2{
.info = frame.info,
.bit_stream = frame.bit_stream,
};
next_frame.info.show_frame = frame.info.last_frame_shown;
frame.info = next_frame.info;
frame.bit_stream = next_frame.bit_stream;
- next_frame = std::move(temp);
+ next_frame = std::move(temp2);
} else {
next_frame.info = frame.info;
next_frame.bit_stream = frame.bit_stream;
@@ -605,12 +640,6 @@ std::vector<u8> VP9::ComposeCompressedHeader() {
writer.End();
return writer.GetBuffer();
-
- const auto writer_bytearray = writer.GetBuffer();
-
- std::vector<u8> compressed_header(writer_bytearray.size());
- std::memcpy(compressed_header.data(), writer_bytearray.data(), writer_bytearray.size());
- return compressed_header;
}
VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
@@ -648,7 +677,6 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
current_frame_info.intra_only = true;
} else {
- std::array<s32, 3> ref_frame_index;
if (!current_frame_info.show_frame) {
uncomp_writer.WriteBit(current_frame_info.intra_only);
@@ -663,9 +691,9 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
}
// Last, Golden, Altref frames
- ref_frame_index = std::array<s32, 3>{0, 1, 2};
+ std::array<s32, 3> ref_frame_index{0, 1, 2};
- // set when next frame is hidden
+ // Set when next frame is hidden
// altref and golden references are swapped
if (swap_next_golden) {
ref_frame_index = std::array<s32, 3>{0, 2, 1};
@@ -754,17 +782,19 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
const s8 old_deltas = loop_filter_ref_deltas[index];
const s8 new_deltas = current_frame_info.ref_deltas[index];
+ const bool differing_delta = old_deltas != new_deltas;
- loop_filter_delta_update |=
- (update_loop_filter_ref_deltas[index] = old_deltas != new_deltas);
+ update_loop_filter_ref_deltas[index] = differing_delta;
+ loop_filter_delta_update |= differing_delta;
}
for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
const s8 old_deltas = loop_filter_mode_deltas[index];
const s8 new_deltas = current_frame_info.mode_deltas[index];
+ const bool differing_delta = old_deltas != new_deltas;
- loop_filter_delta_update |=
- (update_loop_filter_mode_deltas[index] = old_deltas != new_deltas);
+ update_loop_filter_mode_deltas[index] = differing_delta;
+ loop_filter_delta_update |= differing_delta;
}
uncomp_writer.WriteBit(loop_filter_delta_update);
@@ -824,12 +854,12 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
return uncomp_writer;
}
-std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
+const std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
std::vector<u8> bitstream;
{
Vp9FrameContainer curr_frame = GetCurrentFrame(state);
current_frame_info = curr_frame.info;
- bitstream = curr_frame.bit_stream;
+ bitstream = std::move(curr_frame.bit_stream);
}
// The uncompressed header routine sets PrevProb parameters needed for the compressed header
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
index 748e11bae..e2504512c 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -4,9 +4,9 @@
#pragma once
-#include <unordered_map>
+#include <array>
#include <vector>
-#include "common/common_funcs.h"
+
#include "common/common_types.h"
#include "common/stream.h"
#include "video_core/command_classes/codecs/vp9_types.h"
@@ -25,6 +25,12 @@ public:
VpxRangeEncoder();
~VpxRangeEncoder();
+ VpxRangeEncoder(const VpxRangeEncoder&) = delete;
+ VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete;
+
+ VpxRangeEncoder(VpxRangeEncoder&&) = default;
+ VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default;
+
/// Writes the rightmost value_size bits from value into the stream
void Write(s32 value, s32 value_size);
@@ -37,11 +43,11 @@ public:
/// Signal the end of the bitstream
void End();
- std::vector<u8>& GetBuffer() {
+ [[nodiscard]] std::vector<u8>& GetBuffer() {
return base_stream.GetBuffer();
}
- const std::vector<u8>& GetBuffer() const {
+ [[nodiscard]] const std::vector<u8>& GetBuffer() const {
return base_stream.GetBuffer();
}
@@ -52,17 +58,6 @@ private:
u32 range{0xff};
s32 count{-24};
s32 half_probability{128};
- static constexpr std::array<s32, 256> norm_lut{
- 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- };
};
class VpxBitStreamWriter {
@@ -70,6 +65,12 @@ public:
VpxBitStreamWriter();
~VpxBitStreamWriter();
+ VpxBitStreamWriter(const VpxBitStreamWriter&) = delete;
+ VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete;
+
+ VpxBitStreamWriter(VpxBitStreamWriter&&) = default;
+ VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default;
+
/// Write an unsigned integer value
void WriteU(u32 value, u32 value_size);
@@ -86,10 +87,10 @@ public:
void Flush();
/// Returns byte_array
- std::vector<u8>& GetByteArray();
+ [[nodiscard]] std::vector<u8>& GetByteArray();
/// Returns const byte_array
- const std::vector<u8>& GetByteArray() const;
+ [[nodiscard]] const std::vector<u8>& GetByteArray() const;
private:
/// Write bit_count bits from value into buffer
@@ -110,12 +111,18 @@ public:
explicit VP9(GPU& gpu);
~VP9();
+ VP9(const VP9&) = delete;
+ VP9& operator=(const VP9&) = delete;
+
+ VP9(VP9&&) = default;
+ VP9& operator=(VP9&&) = delete;
+
/// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
/// documentation
- std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
+ [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
/// Returns true if the most recent frame was a hidden frame.
- bool WasFrameHidden() const {
+ [[nodiscard]] bool WasFrameHidden() const {
return hidden;
}
@@ -132,12 +139,6 @@ private:
/// Generates compressed header probability deltas in the bitstream writer
void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
- /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
- s32 RemapProbability(s32 new_prob, s32 old_prob);
-
- /// Recenters probability. Based on section 6.3.6 of VP9 Specification
- s32 RecenterNonNeg(s32 new_prob, s32 old_prob);
-
/// Inverse of 6.3.4 Decode term subexp
void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
@@ -157,22 +158,18 @@ private:
/// Write motion vector probability updates. 6.3.17 in the spec
void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
- /// 6.2.14 Tile size calculation
- s32 CalcMinLog2TileCols(s32 frame_width);
- s32 CalcMaxLog2TileCols(s32 frame_width);
-
/// Returns VP9 information from NVDEC provided offset and size
- Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
+ [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
/// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
/// Returns frame to be decoded after buffering
- Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
+ [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
/// Use NVDEC providied information to compose the headers for the current frame
- std::vector<u8> ComposeCompressedHeader();
- VpxBitStreamWriter ComposeUncompressedHeader();
+ [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
+ [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
GPU& gpu;
std::vector<u8> frame;
@@ -180,7 +177,7 @@ private:
std::array<s8, 4> loop_filter_ref_deltas{};
std::array<s8, 2> loop_filter_mode_deltas{};
- bool hidden;
+ bool hidden = false;
s64 current_frame_number = -2; // since we buffer 2 frames
s32 grace_period = 6; // frame offsets need to stabilize
std::array<FrameContexts, 4> frame_ctxs{};
@@ -193,23 +190,6 @@ private:
s32 diff_update_probability = 252;
s32 frame_sync_code = 0x498342;
- static constexpr std::array<s32, 254> map_lut = {
- 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
- 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50,
- 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66,
- 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82,
- 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 6,
- 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113,
- 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129,
- 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
- 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, 160,
- 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176,
- 177, 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
- 193, 14, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207,
- 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223,
- 224, 225, 226, 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
- };
};
} // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 8688fdac0..4f0b05d22 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -4,13 +4,11 @@
#pragma once
-#include <algorithm>
-#include <list>
+#include <array>
+#include <cstring>
#include <vector>
-#include "common/cityhash.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "video_core/command_classes/nvdec_common.h"
namespace Tegra {
class GPU;
@@ -233,9 +231,8 @@ struct PictureInfo {
u32 surface_params{};
INSERT_PADDING_WORDS(3);
- Vp9PictureInfo Convert() const {
-
- return Vp9PictureInfo{
+ [[nodiscard]] Vp9PictureInfo Convert() const {
+ return {
.is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
.intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
.last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
index a5234ee47..c4dd4881a 100644
--- a/src/video_core/command_classes/host1x.cpp
+++ b/src/video_core/command_classes/host1x.cpp
@@ -15,7 +15,7 @@ void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) {
std::memcpy(state_offset, &arguments, sizeof(u32));
}
-void Tegra::Host1x::ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments) {
+void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) {
StateWrite(static_cast<u32>(method), arguments[0]);
switch (method) {
case Method::WaitSyncpt:
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
index 501a5ed2e..013eaa0c1 100644
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/command_classes/host1x.h
@@ -61,7 +61,7 @@ public:
~Host1x();
/// Writes the method into the state, Invoke Execute() if encountered
- void ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments);
+ void ProcessMethod(Method method, const std::vector<u32>& arguments);
private:
/// For Host1x, execute is waiting on a syncpoint previously written into the state
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index ede9466eb..8ca7a7b06 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -2,13 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <bitset>
#include "common/assert.h"
-#include "common/bit_util.h"
-#include "core/memory.h"
#include "video_core/command_classes/nvdec.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
namespace Tegra {
@@ -16,7 +12,7 @@ Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
Nvdec::~Nvdec() = default;
-void Nvdec::ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments) {
+void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) {
if (method == Method::SetVideoCodec) {
codec->StateWrite(static_cast<u32>(method), arguments[0]);
} else {
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index c1a9d843e..eec4443f9 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -4,8 +4,8 @@
#pragma once
+#include <memory>
#include <vector>
-#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/command_classes/codecs/codec.h"
@@ -23,17 +23,17 @@ public:
~Nvdec();
/// Writes the method into the state, Invoke Execute() if encountered
- void ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments);
+ void ProcessMethod(Method method, const std::vector<u32>& arguments);
/// Return most recently decoded frame
- AVFrame* GetFrame();
- const AVFrame* GetFrame() const;
+ [[nodiscard]] AVFrame* GetFrame();
+ [[nodiscard]] const AVFrame* GetFrame() const;
private:
/// Invoke codec to decode a frame
void Execute();
GPU& gpu;
- std::unique_ptr<Tegra::Codec> codec;
+ std::unique_ptr<Codec> codec;
};
} // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp
index a0ab44855..19dc9e0ab 100644
--- a/src/video_core/command_classes/sync_manager.cpp
+++ b/src/video_core/command_classes/sync_manager.cpp
@@ -27,22 +27,22 @@ SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
SyncptIncrManager::~SyncptIncrManager() = default;
void SyncptIncrManager::Increment(u32 id) {
- increments.push_back(SyncptIncr{0, id, true});
+ increments.emplace_back(0, 0, id, true);
IncrementAllDone();
}
u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
const u32 handle = current_id++;
- increments.push_back(SyncptIncr{handle, class_id, id});
+ increments.emplace_back(handle, class_id, id);
return handle;
}
void SyncptIncrManager::SignalDone(u32 handle) {
- auto done_incr = std::find_if(increments.begin(), increments.end(),
- [handle](SyncptIncr incr) { return incr.id == handle; });
- if (done_incr != increments.end()) {
- const SyncptIncr incr = *done_incr;
- *done_incr = SyncptIncr{incr.id, incr.class_id, incr.syncpt_id, true};
+ const auto done_incr =
+ std::find_if(increments.begin(), increments.end(),
+ [handle](const SyncptIncr& incr) { return incr.id == handle; });
+ if (done_incr != increments.cend()) {
+ done_incr->complete = true;
}
IncrementAllDone();
}
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h
index 353b67573..2c321ec58 100644
--- a/src/video_core/command_classes/sync_manager.h
+++ b/src/video_core/command_classes/sync_manager.h
@@ -32,8 +32,8 @@ struct SyncptIncr {
u32 syncpt_id;
bool complete;
- SyncptIncr(u32 id, u32 syncpt_id_, u32 class_id_, bool done = false)
- : id(id), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
+ SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
+ : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
};
class SyncptIncrManager {
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 66e15a1a8..5b52da277 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -26,7 +26,7 @@ void Vic::VicStateWrite(u32 offset, u32 arguments) {
std::memcpy(state_offset, &arguments, sizeof(u32));
}
-void Vic::ProcessMethod(Vic::Method method, const std::vector<u32>& arguments) {
+void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method));
VicStateWrite(static_cast<u32>(method), arguments[0]);
const u64 arg = static_cast<u64>(arguments[0]) << 8;
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
index dd0a2aed8..8c4e284a1 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -63,11 +63,11 @@ public:
SetOutputSurfaceChromaVOffset = 0x1ca
};
- explicit Vic(GPU& gpu, std::shared_ptr<Tegra::Nvdec> nvdec_processor);
+ explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
~Vic();
/// Write to the device state.
- void ProcessMethod(Vic::Method method, const std::vector<u32>& arguments);
+ void ProcessMethod(Method method, const std::vector<u32>& arguments);
private:
void Execute();
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index f2f96ac33..105b85a92 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/cityhash.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
@@ -12,6 +13,20 @@
namespace Tegra {
+void CommandList::RefreshIntegrityChecks(GPU& gpu) {
+ command_list_hashes.resize(command_lists.size());
+
+ for (std::size_t index = 0; index < command_lists.size(); ++index) {
+ const CommandListHeader command_list_header = command_lists[index];
+ std::vector<CommandHeader> command_headers(command_list_header.size);
+ gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
+ command_list_header.size * sizeof(u32));
+ command_list_hashes[index] =
+ Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+ command_list_header.size * sizeof(u32));
+ }
+}
+
DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
DmaPusher::~DmaPusher() = default;
@@ -45,32 +60,51 @@ bool DmaPusher::Step() {
return false;
}
- const CommandList& command_list{dma_pushbuffer.front()};
- ASSERT_OR_EXECUTE(!command_list.empty(), {
- // Somehow the command_list is empty, in order to avoid a crash
- // We ignore it and assume its size is 0.
- dma_pushbuffer.pop();
- dma_pushbuffer_subindex = 0;
- return true;
- });
- const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
- const GPUVAddr dma_get = command_list_header.addr;
-
- if (dma_pushbuffer_subindex >= command_list.size()) {
- // We've gone through the current list, remove it from the queue
- dma_pushbuffer.pop();
- dma_pushbuffer_subindex = 0;
- }
+ CommandList& command_list{dma_pushbuffer.front()};
- if (command_list_header.size == 0) {
- return true;
- }
+ ASSERT_OR_EXECUTE(
+ command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+ // Somehow the command_list is empty, in order to avoid a crash
+ // We ignore it and assume its size is 0.
+ dma_pushbuffer.pop();
+ dma_pushbuffer_subindex = 0;
+ return true;
+ });
- // Push buffer non-empty, read a word
- command_headers.resize(command_list_header.size);
- gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
- command_list_header.size * sizeof(u32));
+ if (command_list.prefetch_command_list.size()) {
+ // Prefetched command list from nvdrv, used for things like synchronization
+ command_headers = std::move(command_list.prefetch_command_list);
+ dma_pushbuffer.pop();
+ } else {
+ const CommandListHeader command_list_header{
+ command_list.command_lists[dma_pushbuffer_subindex]};
+ const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
+ const GPUVAddr dma_get = command_list_header.addr;
+
+ if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+ // We've gone through the current list, remove it from the queue
+ dma_pushbuffer.pop();
+ dma_pushbuffer_subindex = 0;
+ }
+ if (command_list_header.size == 0) {
+ return true;
+ }
+
+ // Push buffer non-empty, read a word
+ command_headers.resize(command_list_header.size);
+ gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
+
+ // Integrity check
+ const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+ command_list_header.size * sizeof(u32));
+ if (new_hash != next_hash) {
+ LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
+ dma_pushbuffer.pop();
+ return true;
+ }
+ }
for (std::size_t index = 0; index < command_headers.size();) {
const CommandHeader& command_header = command_headers[index];
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index efa90d170..8496ba2da 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -27,6 +27,31 @@ enum class SubmissionMode : u32 {
IncreaseOnce = 5
};
+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
+enum class BufferMethods : u32 {
+ BindObject = 0x0,
+ Nop = 0x2,
+ SemaphoreAddressHigh = 0x4,
+ SemaphoreAddressLow = 0x5,
+ SemaphoreSequence = 0x6,
+ SemaphoreTrigger = 0x7,
+ NotifyIntr = 0x8,
+ WrcacheFlush = 0x9,
+ Unk28 = 0xA,
+ UnkCacheFlush = 0xB,
+ RefCnt = 0x14,
+ SemaphoreAcquire = 0x1A,
+ SemaphoreRelease = 0x1B,
+ FenceValue = 0x1C,
+ FenceAction = 0x1D,
+ WaitForInterrupt = 0x1E,
+ Unk7c = 0x1F,
+ Yield = 0x20,
+ NonPullerMethods = 0x40,
+};
+
struct CommandListHeader {
union {
u64 raw;
@@ -49,9 +74,29 @@ union CommandHeader {
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
+static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count,
+ SubmissionMode mode) {
+ CommandHeader result{};
+ result.method.Assign(static_cast<u32>(method));
+ result.arg_count.Assign(arg_count);
+ result.mode.Assign(mode);
+ return result;
+}
+
class GPU;
-using CommandList = std::vector<Tegra::CommandListHeader>;
+struct CommandList final {
+ CommandList() = default;
+ explicit CommandList(std::size_t size) : command_lists(size) {}
+ explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
+ : prefetch_command_list{std::move(prefetch_command_list)} {}
+
+ void RefreshIntegrityChecks(GPU& gpu);
+
+ std::vector<Tegra::CommandListHeader> command_lists;
+ std::vector<u64> command_list_hashes;
+ std::vector<Tegra::CommandHeader> prefetch_command_list;
+};
/**
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
* See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
* details on this implementation.
*/
-class DmaPusher {
+class DmaPusher final {
public:
explicit DmaPusher(Core::System& system, GPU& gpu);
~DmaPusher();
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d374b73cf..a3c05d1b0 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1893,6 +1893,7 @@ public:
ICMP_IMM,
FCMP_RR,
FCMP_RC,
+ FCMP_IMMR,
MUFU, // Multi-Function Operator
RRO_C, // Range Reduction Operator
RRO_R,
@@ -2205,6 +2206,7 @@ private:
INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
+ INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 171f78183..ebd149c3a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -194,30 +194,6 @@ void GPU::SyncGuestHost() {
void GPU::OnCommandListEnd() {
renderer->Rasterizer().ReleaseFences();
}
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
- BindObject = 0x0,
- Nop = 0x2,
- SemaphoreAddressHigh = 0x4,
- SemaphoreAddressLow = 0x5,
- SemaphoreSequence = 0x6,
- SemaphoreTrigger = 0x7,
- NotifyIntr = 0x8,
- WrcacheFlush = 0x9,
- Unk28 = 0xA,
- UnkCacheFlush = 0xB,
- RefCnt = 0x14,
- SemaphoreAcquire = 0x1A,
- SemaphoreRelease = 0x1B,
- FenceValue = 0x1C,
- FenceAction = 0x1D,
- Unk78 = 0x1E,
- Unk7c = 0x1F,
- Yield = 0x20,
- NonPullerMethods = 0x40,
-};
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
@@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
+ break;
case BufferMethods::FenceAction:
+ ProcessFenceActionMethod();
+ break;
+ case BufferMethods::WaitForInterrupt:
+ ProcessWaitForInterruptMethod();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
@@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
}
}
+void GPU::ProcessFenceActionMethod() {
+ switch (regs.fence_action.op) {
+ case FenceOperation::Acquire:
+ WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+ break;
+ case FenceOperation::Increment:
+ IncrementSyncPoint(regs.fence_action.syncpoint_id);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation {}",
+ static_cast<u32>(regs.fence_action.op.Value()));
+ }
+}
+
+void GPU::ProcessWaitForInterruptMethod() {
+ // TODO(bunnei) ImplementMe
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
void GPU::ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b8c613b11..5444b49f3 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -263,6 +263,24 @@ public:
return use_nvdec;
}
+ enum class FenceOperation : u32 {
+ Acquire = 0,
+ Increment = 1,
+ };
+
+ union FenceAction {
+ u32 raw;
+ BitField<0, 1, FenceOperation> op;
+ BitField<8, 24, u32> syncpoint_id;
+
+ static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+ FenceAction result{};
+ result.op.Assign(op);
+ result.syncpoint_id.Assign(syncpoint_id);
+ return {result.raw};
+ }
+ };
+
struct Regs {
static constexpr size_t NUM_REGS = 0x40;
@@ -291,10 +309,7 @@ public:
u32 semaphore_acquire;
u32 semaphore_release;
u32 fence_value;
- union {
- BitField<4, 4, u32> operation;
- BitField<8, 8, u32> id;
- } fence_action;
+ FenceAction fence_action;
INSERT_UNION_PADDING_WORDS(0xE2);
// Puller state
@@ -342,6 +357,8 @@ protected:
private:
void ProcessBindMethod(const MethodCall& method_call);
+ void ProcessFenceActionMethod();
+ void ProcessWaitForInterruptMethod();
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index aabd62c5c..39cc3b869 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -20,14 +20,15 @@ AsyncShaders::~AsyncShaders() {
}
void AsyncShaders::AllocateWorkers() {
- // Max worker threads we should allow
- constexpr u32 MAX_THREADS = 4;
- // Deduce how many threads we can use
- const u32 threads_used = std::thread::hardware_concurrency() / 4;
- // Always allow at least 1 thread regardless of our settings
- const auto max_worker_count = std::max(1U, threads_used);
- // Don't use more than MAX_THREADS
- const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+ // Use at least one thread
+ u32 num_workers = 1;
+
+ // Deduce how many more threads we can use
+ const u32 thread_count = std::thread::hardware_concurrency();
+ if (thread_count >= 8) {
+ // Increase async workers by 1 for every 2 threads >= 8
+ num_workers += 1 + (thread_count - 8) / 2;
+ }
// If we already have workers queued, ignore
if (num_workers == worker_threads.size()) {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 4db329fa5..afef5948d 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::FCMP_RR:
- case OpCode::Id::FCMP_RC: {
+ case OpCode::Id::FCMP_RC:
+ case OpCode::Id::FCMP_IMMR: {
UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
Node op_c = GetRegister(instr.gpr39);
Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index f264b98a0..67183e64c 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -71,11 +71,6 @@ struct Client::Impl {
return {};
}
- if (!cli->is_socket_open()) {
- LOG_ERROR(WebService, "Failed to open socket, skipping request!");
- return {};
- }
-
cli->set_connection_timeout(TIMEOUT_SECONDS);
cli->set_read_timeout(TIMEOUT_SECONDS);
cli->set_write_timeout(TIMEOUT_SECONDS);