summaryrefslogtreecommitdiffstats
path: root/src/core
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/core/core.cpp32
-rw-r--r--src/core/core.h11
-rw-r--r--src/core/core_timing.cpp9
-rw-r--r--src/core/core_timing.h8
-rw-r--r--src/core/file_sys/fsmitm_romfsbuild.cpp38
-rw-r--r--src/core/gpu_dirty_memory_manager.h122
-rw-r--r--src/core/hle/kernel/k_thread.h10
-rw-r--r--src/core/hle/kernel/svc/svc_ipc.cpp37
-rw-r--r--src/core/hle/kernel/svc/svc_synchronization.cpp41
-rw-r--r--src/core/hle/service/audio/audin_u.cpp16
-rw-r--r--src/core/hle/service/audio/audout_u.cpp20
-rw-r--r--src/core/hle/service/audio/audren_u.cpp23
-rw-r--r--src/core/hle/service/audio/hwopus.cpp9
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.cpp25
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.h5
-rw-r--r--src/core/hle/service/nvnflinger/parcel.h24
-rw-r--r--src/core/memory.cpp40
-rw-r--r--src/core/memory.h6
18 files changed, 351 insertions, 125 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b74fd0a58..9e3eb3795 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -27,6 +27,7 @@
#include "core/file_sys/savedata_factory.h"
#include "core/file_sys/vfs_concat.h"
#include "core/file_sys/vfs_real.h"
+#include "core/gpu_dirty_memory_manager.h"
#include "core/hid/hid_core.h"
#include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/kernel/k_process.h"
@@ -130,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
struct System::Impl {
explicit Impl(System& system)
: kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
- cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {}
+ cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system},
+ gpu_dirty_memory_write_manager{} {
+ memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
+ }
void Initialize(System& system) {
device_memory = std::make_unique<Core::DeviceMemory>();
@@ -234,6 +238,8 @@ struct System::Impl {
// Setting changes may require a full system reinitialization (e.g., disabling multicore).
ReinitializeIfNecessary(system);
+ memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
+
kernel.Initialize();
cpu_manager.Initialize();
@@ -540,6 +546,9 @@ struct System::Impl {
std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
+
+ std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
+ gpu_dirty_memory_write_manager{};
};
System::System() : impl{std::make_unique<Impl>(*this)} {}
@@ -629,10 +638,31 @@ void System::PrepareReschedule(const u32 core_index) {
impl->kernel.PrepareReschedule(core_index);
}
+Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() {
+ const std::size_t core = impl->kernel.GetCurrentHostThreadID();
+ return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
+ ? core
+ : Core::Hardware::NUM_CPU_CORES - 1];
+}
+
+/// Provides a constant reference to the current gou dirty memory manager.
+const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const {
+ const std::size_t core = impl->kernel.GetCurrentHostThreadID();
+ return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
+ ? core
+ : Core::Hardware::NUM_CPU_CORES - 1];
+}
+
size_t System::GetCurrentHostThreadID() const {
return impl->kernel.GetCurrentHostThreadID();
}
+void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
+ for (auto& manager : impl->gpu_dirty_memory_write_manager) {
+ manager.Gather(callback);
+ }
+}
+
PerfStatsResults System::GetAndResetPerfStats() {
return impl->GetAndResetPerfStats();
}
diff --git a/src/core/core.h b/src/core/core.h
index 93afc9303..14b2f7785 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -108,9 +108,10 @@ class CpuManager;
class Debugger;
class DeviceMemory;
class ExclusiveMonitor;
-class SpeedLimiter;
+class GPUDirtyMemoryManager;
class PerfStats;
class Reporter;
+class SpeedLimiter;
class TelemetrySession;
struct PerfStatsResults;
@@ -225,6 +226,14 @@ public:
/// Prepare the core emulation for a reschedule
void PrepareReschedule(u32 core_index);
+ /// Provides a reference to the gou dirty memory manager.
+ [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager();
+
+ /// Provides a constant reference to the current gou dirty memory manager.
+ [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const;
+
+ void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
+
[[nodiscard]] size_t GetCurrentHostThreadID() const;
/// Gets and resets core performance statistics
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 4f0a3f8ea..e6112a3c9 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -253,9 +253,6 @@ void CoreTiming::ThreadLoop() {
auto wait_time = *next_time - GetGlobalTimeNs().count();
if (wait_time > 0) {
#ifdef _WIN32
- const auto timer_resolution_ns =
- Common::Windows::GetCurrentTimerResolution().count();
-
while (!paused && !event.IsSet() && wait_time > 0) {
wait_time = *next_time - GetGlobalTimeNs().count();
@@ -316,4 +313,10 @@ std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
return std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)};
}
+#ifdef _WIN32
+void CoreTiming::SetTimerResolutionNs(std::chrono::nanoseconds ns) {
+ timer_resolution_ns = ns.count();
+}
+#endif
+
} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 10db1de55..5bca1c78d 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -131,6 +131,10 @@ public:
/// Checks for events manually and returns time in nanoseconds for next event, threadsafe.
std::optional<s64> Advance();
+#ifdef _WIN32
+ void SetTimerResolutionNs(std::chrono::nanoseconds ns);
+#endif
+
private:
struct Event;
@@ -143,6 +147,10 @@ private:
s64 global_timer = 0;
+#ifdef _WIN32
+ s64 timer_resolution_ns;
+#endif
+
// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
// We don't use std::priority_queue because we need to be able to serialize, unserialize and
// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp
index 1ff83c08c..e39c7b62b 100644
--- a/src/core/file_sys/fsmitm_romfsbuild.cpp
+++ b/src/core/file_sys/fsmitm_romfsbuild.cpp
@@ -105,19 +105,11 @@ static u64 romfs_get_hash_table_count(u64 num_entries) {
return count;
}
-void RomFSBuildContext::VisitDirectory(VirtualDir root_romfs, VirtualDir ext_dir,
+void RomFSBuildContext::VisitDirectory(VirtualDir romfs_dir, VirtualDir ext_dir,
std::shared_ptr<RomFSBuildDirectoryContext> parent) {
std::vector<std::shared_ptr<RomFSBuildDirectoryContext>> child_dirs;
- VirtualDir dir;
-
- if (parent->path_len == 0) {
- dir = root_romfs;
- } else {
- dir = root_romfs->GetDirectoryRelative(parent->path);
- }
-
- const auto entries = dir->GetEntries();
+ const auto entries = romfs_dir->GetEntries();
for (const auto& kv : entries) {
if (kv.second == VfsEntryType::Directory) {
@@ -127,7 +119,7 @@ void RomFSBuildContext::VisitDirectory(VirtualDir root_romfs, VirtualDir ext_dir
child->path_len = child->cur_path_ofs + static_cast<u32>(kv.first.size());
child->path = parent->path + "/" + kv.first;
- if (ext_dir != nullptr && ext_dir->GetFileRelative(child->path + ".stub") != nullptr) {
+ if (ext_dir != nullptr && ext_dir->GetFile(kv.first + ".stub") != nullptr) {
continue;
}
@@ -144,17 +136,17 @@ void RomFSBuildContext::VisitDirectory(VirtualDir root_romfs, VirtualDir ext_dir
child->path_len = child->cur_path_ofs + static_cast<u32>(kv.first.size());
child->path = parent->path + "/" + kv.first;
- if (ext_dir != nullptr && ext_dir->GetFileRelative(child->path + ".stub") != nullptr) {
+ if (ext_dir != nullptr && ext_dir->GetFile(kv.first + ".stub") != nullptr) {
continue;
}
// Sanity check on path_len
ASSERT(child->path_len < FS_MAX_PATH);
- child->source = root_romfs->GetFileRelative(child->path);
+ child->source = romfs_dir->GetFile(kv.first);
if (ext_dir != nullptr) {
- if (const auto ips = ext_dir->GetFileRelative(child->path + ".ips")) {
+ if (const auto ips = ext_dir->GetFile(kv.first + ".ips")) {
if (auto patched = PatchIPS(child->source, ips)) {
child->source = std::move(patched);
}
@@ -168,23 +160,27 @@ void RomFSBuildContext::VisitDirectory(VirtualDir root_romfs, VirtualDir ext_dir
}
for (auto& child : child_dirs) {
- this->VisitDirectory(root_romfs, ext_dir, child);
+ auto subdir_name = std::string_view(child->path).substr(child->cur_path_ofs);
+ auto child_romfs_dir = romfs_dir->GetSubdirectory(subdir_name);
+ auto child_ext_dir = ext_dir != nullptr ? ext_dir->GetSubdirectory(subdir_name) : nullptr;
+ this->VisitDirectory(child_romfs_dir, child_ext_dir, child);
}
}
bool RomFSBuildContext::AddDirectory(std::shared_ptr<RomFSBuildDirectoryContext> parent_dir_ctx,
std::shared_ptr<RomFSBuildDirectoryContext> dir_ctx) {
// Check whether it's already in the known directories.
- const auto existing = directories.find(dir_ctx->path);
- if (existing != directories.end())
+ const auto [it, is_new] = directories.emplace(dir_ctx->path, nullptr);
+ if (!is_new) {
return false;
+ }
// Add a new directory.
num_dirs++;
dir_table_size +=
sizeof(RomFSDirectoryEntry) + Common::AlignUp(dir_ctx->path_len - dir_ctx->cur_path_ofs, 4);
dir_ctx->parent = parent_dir_ctx;
- directories.emplace(dir_ctx->path, dir_ctx);
+ it->second = dir_ctx;
return true;
}
@@ -192,8 +188,8 @@ bool RomFSBuildContext::AddDirectory(std::shared_ptr<RomFSBuildDirectoryContext>
bool RomFSBuildContext::AddFile(std::shared_ptr<RomFSBuildDirectoryContext> parent_dir_ctx,
std::shared_ptr<RomFSBuildFileContext> file_ctx) {
// Check whether it's already in the known files.
- const auto existing = files.find(file_ctx->path);
- if (existing != files.end()) {
+ const auto [it, is_new] = files.emplace(file_ctx->path, nullptr);
+ if (!is_new) {
return false;
}
@@ -202,7 +198,7 @@ bool RomFSBuildContext::AddFile(std::shared_ptr<RomFSBuildDirectoryContext> pare
file_table_size +=
sizeof(RomFSFileEntry) + Common::AlignUp(file_ctx->path_len - file_ctx->cur_path_ofs, 4);
file_ctx->parent = parent_dir_ctx;
- files.emplace(file_ctx->path, file_ctx);
+ it->second = file_ctx;
return true;
}
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h
new file mode 100644
index 000000000..9687531e8
--- /dev/null
+++ b/src/core/gpu_dirty_memory_manager.h
@@ -0,0 +1,122 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <atomic>
+#include <bit>
+#include <functional>
+#include <mutex>
+#include <utility>
+#include <vector>
+
+#include "core/memory.h"
+
+namespace Core {
+
+class GPUDirtyMemoryManager {
+public:
+ GPUDirtyMemoryManager() : current{default_transform} {
+ back_buffer.reserve(256);
+ front_buffer.reserve(256);
+ }
+
+ ~GPUDirtyMemoryManager() = default;
+
+ void Collect(VAddr address, size_t size) {
+ TransformAddress t = BuildTransform(address, size);
+ TransformAddress tmp, original;
+ do {
+ tmp = current.load(std::memory_order_acquire);
+ original = tmp;
+ if (tmp.address != t.address) {
+ if (IsValid(tmp.address)) {
+ std::scoped_lock lk(guard);
+ back_buffer.emplace_back(tmp);
+ current.exchange(t, std::memory_order_relaxed);
+ return;
+ }
+ tmp.address = t.address;
+ tmp.mask = 0;
+ }
+ if ((tmp.mask | t.mask) == tmp.mask) {
+ return;
+ }
+ tmp.mask |= t.mask;
+ } while (!current.compare_exchange_weak(original, tmp, std::memory_order_release,
+ std::memory_order_relaxed));
+ }
+
+ void Gather(std::function<void(VAddr, size_t)>& callback) {
+ {
+ std::scoped_lock lk(guard);
+ TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
+ front_buffer.swap(back_buffer);
+ if (IsValid(t.address)) {
+ front_buffer.emplace_back(t);
+ }
+ }
+ for (auto& transform : front_buffer) {
+ size_t offset = 0;
+ u64 mask = transform.mask;
+ while (mask != 0) {
+ const size_t empty_bits = std::countr_zero(mask);
+ offset += empty_bits << align_bits;
+ mask = mask >> empty_bits;
+
+ const size_t continuous_bits = std::countr_one(mask);
+ callback((static_cast<VAddr>(transform.address) << page_bits) + offset,
+ continuous_bits << align_bits);
+ mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
+ offset += continuous_bits << align_bits;
+ }
+ }
+ front_buffer.clear();
+ }
+
+private:
+ struct alignas(8) TransformAddress {
+ u32 address;
+ u32 mask;
+ };
+
+ constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1;
+ constexpr static size_t page_size = 1ULL << page_bits;
+ constexpr static size_t page_mask = page_size - 1;
+
+ constexpr static size_t align_bits = 6U;
+ constexpr static size_t align_size = 1U << align_bits;
+ constexpr static size_t align_mask = align_size - 1;
+ constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U};
+
+ bool IsValid(VAddr address) {
+ return address < (1ULL << 39);
+ }
+
+ template <typename T>
+ T CreateMask(size_t top_bit, size_t minor_bit) {
+ T mask = ~T(0);
+ mask <<= (sizeof(T) * 8 - top_bit);
+ mask >>= (sizeof(T) * 8 - top_bit);
+ mask >>= minor_bit;
+ mask <<= minor_bit;
+ return mask;
+ }
+
+ TransformAddress BuildTransform(VAddr address, size_t size) {
+ const size_t minor_address = address & page_mask;
+ const size_t minor_bit = minor_address >> align_bits;
+ const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
+ TransformAddress result{};
+ result.address = static_cast<u32>(address >> page_bits);
+ result.mask = CreateMask<u32>(top_bit, minor_bit);
+ return result;
+ }
+
+ std::atomic<TransformAddress> current{};
+ std::mutex guard;
+ std::vector<TransformAddress> back_buffer;
+ std::vector<TransformAddress> front_buffer;
+};
+
+} // namespace Core
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index dd662b3f8..d178c2453 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -338,6 +338,15 @@ public:
return m_parent != nullptr;
}
+ std::span<KSynchronizationObject*> GetSynchronizationObjectBuffer() {
+ return m_sync_object_buffer.sync_objects;
+ }
+
+ std::span<Handle> GetHandleBuffer() {
+ return {m_sync_object_buffer.handles.data() + Svc::ArgumentHandleCountMax,
+ Svc::ArgumentHandleCountMax};
+ }
+
u16 GetUserDisableCount() const;
void SetInterruptFlag();
void ClearInterruptFlag();
@@ -855,6 +864,7 @@ private:
u32* m_light_ipc_data{};
KProcessAddress m_tls_address{};
KLightLock m_activity_pause_lock;
+ SyncObjectBuffer m_sync_object_buffer{};
s64 m_schedule_count{};
s64 m_last_scheduled_tick{};
std::array<QueueEntry, Core::Hardware::NUM_CPU_CORES> m_per_core_priority_queue_entry{};
diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp
index 60247df2e..bb94f6934 100644
--- a/src/core/hle/kernel/svc/svc_ipc.cpp
+++ b/src/core/hle/kernel/svc/svc_ipc.cpp
@@ -38,22 +38,31 @@ Result SendAsyncRequestWithUserBuffer(Core::System& system, Handle* out_event_ha
Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_addr, s32 num_handles,
Handle reply_target, s64 timeout_ns) {
+ // Ensure number of handles is valid.
+ R_UNLESS(0 <= num_handles && num_handles <= ArgumentHandleCountMax, ResultOutOfRange);
+
+ // Get the synchronization context.
auto& kernel = system.Kernel();
auto& handle_table = GetCurrentProcess(kernel).GetHandleTable();
-
- R_UNLESS(0 <= num_handles && num_handles <= ArgumentHandleCountMax, ResultOutOfRange);
- R_UNLESS(GetCurrentMemory(kernel).IsValidVirtualAddressRange(
- handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)),
- ResultInvalidPointer);
-
- std::array<Handle, Svc::ArgumentHandleCountMax> handles;
- GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles);
-
- // Convert handle list to object table.
- std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
- R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(),
- num_handles),
- ResultInvalidHandle);
+ auto objs = GetCurrentThread(kernel).GetSynchronizationObjectBuffer();
+ auto handles = GetCurrentThread(kernel).GetHandleBuffer();
+
+ // Copy user handles.
+ if (num_handles > 0) {
+ // Ensure we can try to get the handles.
+ R_UNLESS(GetCurrentMemory(kernel).IsValidVirtualAddressRange(
+ handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)),
+ ResultInvalidPointer);
+
+ // Get the handles.
+ GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(),
+ sizeof(Handle) * num_handles);
+
+ // Convert the handles to objects.
+ R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(
+ objs.data(), handles.data(), num_handles),
+ ResultInvalidHandle);
+ }
// Ensure handles are closed when we're done.
SCOPE_EXIT({
diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp
index 53df5bcd8..f02d03f30 100644
--- a/src/core/hle/kernel/svc/svc_synchronization.cpp
+++ b/src/core/hle/kernel/svc/svc_synchronization.cpp
@@ -47,21 +47,35 @@ Result ResetSignal(Core::System& system, Handle handle) {
R_THROW(ResultInvalidHandle);
}
-static Result WaitSynchronization(Core::System& system, int32_t* out_index, const Handle* handles,
- int32_t num_handles, int64_t timeout_ns) {
+/// Wait for the given handles to synchronize, timeout after the specified nanoseconds
+Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_handles,
+ int32_t num_handles, int64_t timeout_ns) {
+ LOG_TRACE(Kernel_SVC, "called user_handles={:#x}, num_handles={}, timeout_ns={}", user_handles,
+ num_handles, timeout_ns);
+
// Ensure number of handles is valid.
R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange);
// Get the synchronization context.
auto& kernel = system.Kernel();
auto& handle_table = GetCurrentProcess(kernel).GetHandleTable();
- std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
+ auto objs = GetCurrentThread(kernel).GetSynchronizationObjectBuffer();
+ auto handles = GetCurrentThread(kernel).GetHandleBuffer();
// Copy user handles.
if (num_handles > 0) {
+ // Ensure we can try to get the handles.
+ R_UNLESS(GetCurrentMemory(kernel).IsValidVirtualAddressRange(
+ user_handles, static_cast<u64>(sizeof(Handle) * num_handles)),
+ ResultInvalidPointer);
+
+ // Get the handles.
+ GetCurrentMemory(kernel).ReadBlock(user_handles, handles.data(),
+ sizeof(Handle) * num_handles);
+
// Convert the handles to objects.
- R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles,
- num_handles),
+ R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(
+ objs.data(), handles.data(), num_handles),
ResultInvalidHandle);
}
@@ -80,23 +94,6 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons
R_RETURN(res);
}
-/// Wait for the given handles to synchronize, timeout after the specified nanoseconds
-Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_handles,
- int32_t num_handles, int64_t timeout_ns) {
- LOG_TRACE(Kernel_SVC, "called user_handles={:#x}, num_handles={}, timeout_ns={}", user_handles,
- num_handles, timeout_ns);
-
- // Ensure number of handles is valid.
- R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange);
- std::array<Handle, Svc::ArgumentHandleCountMax> handles;
- if (num_handles > 0) {
- GetCurrentMemory(system.Kernel())
- .ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle));
- }
-
- R_RETURN(WaitSynchronization(system, out_index, handles.data(), num_handles, timeout_ns));
-}
-
/// Resumes a thread waiting on WaitSynchronization
Result CancelSynchronization(Core::System& system, Handle handle) {
LOG_TRACE(Kernel_SVC, "called handle=0x{:X}", handle);
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index c8d574993..526a39130 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -5,7 +5,7 @@
#include "audio_core/renderer/audio_device.h"
#include "common/common_funcs.h"
#include "common/logging/log.h"
-#include "common/settings.h"
+#include "common/scratch_buffer.h"
#include "common/string_util.h"
#include "core/core.h"
#include "core/hle/kernel/k_event.h"
@@ -124,12 +124,15 @@ private:
void GetReleasedAudioInBuffer(HLERequestContext& ctx) {
const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
- tmp_buffer.resize_destructive(write_buffer_size);
- tmp_buffer[0] = 0;
+ released_buffer.resize_destructive(write_buffer_size);
+ released_buffer[0] = 0;
- const auto count = impl->GetReleasedBuffers(tmp_buffer);
+ const auto count = impl->GetReleasedBuffers(released_buffer);
- ctx.WriteBuffer(tmp_buffer);
+ LOG_TRACE(Service_Audio, "called. Session {} released {} buffers",
+ impl->GetSystem().GetSessionId(), count);
+
+ ctx.WriteBuffer(released_buffer);
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
@@ -155,7 +158,6 @@ private:
LOG_DEBUG(Service_Audio, "called. Buffer count={}", buffer_count);
IPC::ResponseBuilder rb{ctx, 3};
-
rb.Push(ResultSuccess);
rb.Push(buffer_count);
}
@@ -195,7 +197,7 @@ private:
KernelHelpers::ServiceContext service_context;
Kernel::KEvent* event;
std::shared_ptr<AudioCore::AudioIn::In> impl;
- Common::ScratchBuffer<u64> tmp_buffer;
+ Common::ScratchBuffer<u64> released_buffer;
};
AudInU::AudInU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 032c8c11f..23f84a29f 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -9,6 +9,7 @@
#include "audio_core/renderer/audio_device.h"
#include "common/common_funcs.h"
#include "common/logging/log.h"
+#include "common/scratch_buffer.h"
#include "common/string_util.h"
#include "common/swap.h"
#include "core/core.h"
@@ -102,8 +103,8 @@ private:
AudioOutBuffer buffer{};
std::memcpy(&buffer, in_buffer.data(), sizeof(AudioOutBuffer));
- [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()};
- LOG_TRACE(Service_Audio, "called. Session {} Appending buffer {:08X}", sessionid, tag);
+ LOG_TRACE(Service_Audio, "called. Session {} Appending buffer {:08X}",
+ impl->GetSystem().GetSessionId(), tag);
auto result = impl->AppendBuffer(buffer, tag);
@@ -123,12 +124,15 @@ private:
void GetReleasedAudioOutBuffers(HLERequestContext& ctx) {
const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
- tmp_buffer.resize_destructive(write_buffer_size);
- tmp_buffer[0] = 0;
+ released_buffer.resize_destructive(write_buffer_size);
+ released_buffer[0] = 0;
- const auto count = impl->GetReleasedBuffers(tmp_buffer);
+ const auto count = impl->GetReleasedBuffers(released_buffer);
- ctx.WriteBuffer(tmp_buffer);
+ ctx.WriteBuffer(released_buffer);
+
+ LOG_TRACE(Service_Audio, "called. Session {} released {} buffers",
+ impl->GetSystem().GetSessionId(), count);
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
@@ -154,7 +158,6 @@ private:
LOG_DEBUG(Service_Audio, "called. Buffer count={}", buffer_count);
IPC::ResponseBuilder rb{ctx, 3};
-
rb.Push(ResultSuccess);
rb.Push(buffer_count);
}
@@ -165,7 +168,6 @@ private:
LOG_DEBUG(Service_Audio, "called. Played samples={}", samples_played);
IPC::ResponseBuilder rb{ctx, 4};
-
rb.Push(ResultSuccess);
rb.Push(samples_played);
}
@@ -205,7 +207,7 @@ private:
KernelHelpers::ServiceContext service_context;
Kernel::KEvent* event;
std::shared_ptr<AudioCore::AudioOut::Out> impl;
- Common::ScratchBuffer<u64> tmp_buffer;
+ Common::ScratchBuffer<u64> released_buffer;
};
AudOutU::AudOutU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 12845c23a..003870176 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -15,6 +15,7 @@
#include "common/common_funcs.h"
#include "common/logging/log.h"
#include "common/polyfill_ranges.h"
+#include "common/scratch_buffer.h"
#include "common/string_util.h"
#include "core/core.h"
#include "core/hle/kernel/k_event.h"
@@ -119,23 +120,23 @@ private:
auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0};
if (is_buffer_b) {
const auto buffersB{ctx.BufferDescriptorB()};
- tmp_output.resize_destructive(buffersB[0].Size());
- tmp_performance.resize_destructive(buffersB[1].Size());
+ output_buffer.resize_destructive(buffersB[0].Size());
+ performance_buffer.resize_destructive(buffersB[1].Size());
} else {
const auto buffersC{ctx.BufferDescriptorC()};
- tmp_output.resize_destructive(buffersC[0].Size());
- tmp_performance.resize_destructive(buffersC[1].Size());
+ output_buffer.resize_destructive(buffersC[0].Size());
+ performance_buffer.resize_destructive(buffersC[1].Size());
}
- auto result = impl->RequestUpdate(input, tmp_performance, tmp_output);
+ auto result = impl->RequestUpdate(input, performance_buffer, output_buffer);
if (result.IsSuccess()) {
if (is_buffer_b) {
- ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0);
- ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1);
+ ctx.WriteBufferB(output_buffer.data(), output_buffer.size(), 0);
+ ctx.WriteBufferB(performance_buffer.data(), performance_buffer.size(), 1);
} else {
- ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0);
- ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1);
+ ctx.WriteBufferC(output_buffer.data(), output_buffer.size(), 0);
+ ctx.WriteBufferC(performance_buffer.data(), performance_buffer.size(), 1);
}
} else {
LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description);
@@ -233,8 +234,8 @@ private:
Kernel::KEvent* rendered_event;
Manager& manager;
std::unique_ptr<Renderer> impl;
- Common::ScratchBuffer<u8> tmp_output;
- Common::ScratchBuffer<u8> tmp_performance;
+ Common::ScratchBuffer<u8> output_buffer;
+ Common::ScratchBuffer<u8> performance_buffer;
};
class IAudioDevice final : public ServiceFramework<IAudioDevice> {
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index c835f6cb7..fa77007f3 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -11,6 +11,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/scratch_buffer.h"
#include "core/hle/service/audio/hwopus.h"
#include "core/hle/service/ipc_helpers.h"
@@ -68,13 +69,13 @@ private:
ExtraBehavior extra_behavior) {
u32 consumed = 0;
u32 sample_count = 0;
- tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>());
+ samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>());
if (extra_behavior == ExtraBehavior::ResetContext) {
ResetDecoderContext();
}
- if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) {
+ if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
LOG_ERROR(Audio, "Failed to decode opus data");
IPC::ResponseBuilder rb{ctx, 2};
// TODO(ogniK): Use correct error code
@@ -90,7 +91,7 @@ private:
if (performance) {
rb.Push<u64>(*performance);
}
- ctx.WriteBuffer(tmp_samples);
+ ctx.WriteBuffer(samples);
}
bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input,
@@ -154,7 +155,7 @@ private:
OpusDecoderPtr decoder;
u32 sample_rate;
u32 channel_count;
- Common::ScratchBuffer<opus_int16> tmp_samples;
+ Common::ScratchBuffer<opus_int16> samples;
};
class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index 348207e25..c8a880e84 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -2,7 +2,6 @@
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
-#include <cinttypes>
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/kernel/k_event.h"
@@ -63,12 +62,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) {
}
// Check device
- tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
+ output_buffer.resize_destructive(ctx.GetWriteBufferSize(0));
const auto input_buffer = ctx.ReadBuffer(0);
- const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output);
+ const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer);
if (command.is_out != 0) {
- ctx.WriteBuffer(tmp_output);
+ ctx.WriteBuffer(output_buffer);
}
IPC::ResponseBuilder rb{ctx, 3};
@@ -90,12 +89,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) {
const auto input_buffer = ctx.ReadBuffer(0);
const auto input_inlined_buffer = ctx.ReadBuffer(1);
- tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
+ output_buffer.resize_destructive(ctx.GetWriteBufferSize(0));
const auto nv_result =
- nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output);
+ nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer);
if (command.is_out != 0) {
- ctx.WriteBuffer(tmp_output);
+ ctx.WriteBuffer(output_buffer);
}
IPC::ResponseBuilder rb{ctx, 3};
@@ -116,12 +115,14 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) {
}
const auto input_buffer = ctx.ReadBuffer(0);
- tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
- tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1));
- const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline);
+ output_buffer.resize_destructive(ctx.GetWriteBufferSize(0));
+ inline_output_buffer.resize_destructive(ctx.GetWriteBufferSize(1));
+
+ const auto nv_result =
+ nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, inline_output_buffer);
if (command.is_out != 0) {
- ctx.WriteBuffer(tmp_output, 0);
- ctx.WriteBuffer(tmp_output_inline, 1);
+ ctx.WriteBuffer(output_buffer, 0);
+ ctx.WriteBuffer(inline_output_buffer, 1);
}
IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h
index 4b593ff90..6e98115dc 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.h
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.h
@@ -4,6 +4,7 @@
#pragma once
#include <memory>
+
#include "common/scratch_buffer.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/hle/service/service.h"
@@ -34,8 +35,8 @@ private:
u64 pid{};
bool is_initialized{};
- Common::ScratchBuffer<u8> tmp_output;
- Common::ScratchBuffer<u8> tmp_output_inline;
+ Common::ScratchBuffer<u8> output_buffer;
+ Common::ScratchBuffer<u8> inline_output_buffer;
};
} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvnflinger/parcel.h b/src/core/hle/service/nvnflinger/parcel.h
index 23ba315a0..e2c9bbd50 100644
--- a/src/core/hle/service/nvnflinger/parcel.h
+++ b/src/core/hle/service/nvnflinger/parcel.h
@@ -6,6 +6,7 @@
#include <memory>
#include <span>
#include <vector>
+
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
@@ -148,9 +149,9 @@ public:
this->WriteImpl(0U, m_object_buffer);
}
- std::vector<u8> Serialize() const {
- std::vector<u8> output_buffer(sizeof(ParcelHeader) + m_data_buffer.size() +
- m_object_buffer.size());
+ std::span<u8> Serialize() {
+ m_output_buffer.resize(sizeof(ParcelHeader) + m_data_buffer.size() +
+ m_object_buffer.size());
ParcelHeader header{};
header.data_size = static_cast<u32>(m_data_buffer.size());
@@ -158,17 +159,17 @@ public:
header.objects_size = static_cast<u32>(m_object_buffer.size());
header.objects_offset = header.data_offset + header.data_size;
- std::memcpy(output_buffer.data(), &header, sizeof(header));
- std::ranges::copy(m_data_buffer, output_buffer.data() + header.data_offset);
- std::ranges::copy(m_object_buffer, output_buffer.data() + header.objects_offset);
+ std::memcpy(m_output_buffer.data(), &header, sizeof(ParcelHeader));
+ std::ranges::copy(m_data_buffer, m_output_buffer.data() + header.data_offset);
+ std::ranges::copy(m_object_buffer, m_output_buffer.data() + header.objects_offset);
- return output_buffer;
+ return m_output_buffer;
}
private:
- template <typename T>
+ template <typename T, size_t BufferSize>
requires(std::is_trivially_copyable_v<T>)
- void WriteImpl(const T& val, boost::container::small_vector<u8, 0x200>& buffer) {
+ void WriteImpl(const T& val, boost::container::small_vector<u8, BufferSize>& buffer) {
const size_t aligned_size = Common::AlignUp(sizeof(T), 4);
const size_t old_size = buffer.size();
buffer.resize(old_size + aligned_size);
@@ -177,8 +178,9 @@ private:
}
private:
- boost::container::small_vector<u8, 0x200> m_data_buffer;
- boost::container::small_vector<u8, 0x200> m_object_buffer;
+ boost::container::small_vector<u8, 0x1B0> m_data_buffer;
+ boost::container::small_vector<u8, 0x40> m_object_buffer;
+ boost::container::small_vector<u8, 0x200> m_output_buffer;
};
} // namespace Service::android
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 514ba0d66..257406f09 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -3,6 +3,7 @@
#include <algorithm>
#include <cstring>
+#include <span>
#include "common/assert.h"
#include "common/atomic_ops.h"
@@ -13,6 +14,7 @@
#include "common/swap.h"
#include "core/core.h"
#include "core/device_memory.h"
+#include "core/gpu_dirty_memory_manager.h"
#include "core/hardware_properties.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
@@ -678,7 +680,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
GetInteger(vaddr), static_cast<u64>(data));
},
- [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); });
+ [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
if (ptr) {
std::memcpy(ptr, &data, sizeof(T));
}
@@ -692,7 +694,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
},
- [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); });
+ [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -707,7 +709,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
},
- [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); });
+ [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -717,7 +719,7 @@ struct Memory::Impl {
void HandleRasterizerDownload(VAddr address, size_t size) {
const size_t core = system.GetCurrentHostThreadID();
- auto& current_area = rasterizer_areas[core];
+ auto& current_area = rasterizer_read_areas[core];
const VAddr end_address = address + size;
if (current_area.start_address <= address && end_address <= current_area.end_address)
[[likely]] {
@@ -726,9 +728,31 @@ struct Memory::Impl {
current_area = system.GPU().OnCPURead(address, size);
}
- Common::PageTable* current_page_table = nullptr;
- std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{};
+ void HandleRasterizerWrite(VAddr address, size_t size) {
+ const size_t core = system.GetCurrentHostThreadID();
+ auto& current_area = rasterizer_write_areas[core];
+ VAddr subaddress = address >> YUZU_PAGEBITS;
+ bool do_collection = current_area.last_address == subaddress;
+ if (!do_collection) [[unlikely]] {
+ do_collection = system.GPU().OnCPUWrite(address, size);
+ if (!do_collection) {
+ return;
+ }
+ current_area.last_address = subaddress;
+ }
+ gpu_dirty_managers[core].Collect(address, size);
+ }
+
+ struct GPUDirtyState {
+ VAddr last_address;
+ };
+
Core::System& system;
+ Common::PageTable* current_page_table = nullptr;
+ std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
+ rasterizer_read_areas{};
+ std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
+ std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
};
Memory::Memory(Core::System& system_) : system{system_} {
@@ -876,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size)
impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);
}
+void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) {
+ impl->gpu_dirty_managers = managers;
+}
+
Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {
return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);
}
diff --git a/src/core/memory.h b/src/core/memory.h
index 72a0be813..ea01824f8 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -5,6 +5,7 @@
#include <cstddef>
#include <memory>
+#include <span>
#include <string>
#include "common/typed_address.h"
#include "core/hle/result.h"
@@ -15,7 +16,8 @@ struct PageTable;
namespace Core {
class System;
-}
+class GPUDirtyMemoryManager;
+} // namespace Core
namespace Kernel {
class PhysicalMemory;
@@ -458,6 +460,8 @@ public:
*/
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
+ void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
+
private:
Core::System& system;