summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h47
-rw-r--r--src/video_core/engines/engine_upload.cpp7
-rw-r--r--src/video_core/engines/engine_upload.h8
-rw-r--r--src/video_core/engines/kepler_compute.cpp1
-rw-r--r--src/video_core/engines/kepler_memory.cpp4
-rw-r--r--src/video_core/engines/kepler_memory.h7
-rw-r--r--src/video_core/engines/maxwell_3d.cpp1
-rw-r--r--src/video_core/engines/maxwell_3d.h3
-rw-r--r--src/video_core/gpu.cpp8
-rw-r--r--src/video_core/memory_manager.h4
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_base.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h2
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h50
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h4
-rw-r--r--src/video_core/shader_cache.cpp4
-rw-r--r--src/video_core/shader_cache.h3
21 files changed, 185 insertions, 31 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 048dba4f3..fa26eb8b0 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -131,6 +131,8 @@ public:
void DownloadMemory(VAddr cpu_addr, u64 size);
+ bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer);
+
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
void DisableGraphicsUniformBuffer(size_t stage, u32 index);
@@ -808,6 +810,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
return;
}
MICROPROFILE_SCOPE(GPU_DownloadMemory);
+ const bool is_accuracy_normal =
+ Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0;
@@ -819,6 +823,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
buffer.ForEachDownloadRangeAndClear(
cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ if (is_accuracy_normal) {
+ return;
+ }
const VAddr buffer_addr = buffer.CpuAddr();
const auto add_download = [&](VAddr start, VAddr end) {
const u64 new_offset = start - buffer_addr;
@@ -1417,10 +1424,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
const IntervalType base_interval{cpu_addr, cpu_addr + size};
common_ranges.add(base_interval);
- const bool is_accuracy_high =
- Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
- if (!is_async && !is_accuracy_high) {
+ if (!is_async) {
return;
}
uncommitted_ranges.add(base_interval);
@@ -1644,6 +1649,42 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
}
template <class P>
+bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
+ std::span<u8> inlined_buffer) {
+ const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
+ if (!is_dirty) {
+ return false;
+ }
+ if (!IsRegionGpuModified(dest_address, copy_size)) {
+ return false;
+ }
+
+ const IntervalType subtract_interval{dest_address, dest_address + copy_size};
+ ClearDownload(subtract_interval);
+ common_ranges.subtract(subtract_interval);
+
+ BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
+ auto& buffer = slot_buffers[buffer_id];
+ SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
+
+ if constexpr (USE_MEMORY_MAPS) {
+ std::array copies{BufferCopy{
+ .src_offset = 0,
+ .dst_offset = buffer.Offset(dest_address),
+ .size = copy_size,
+ }};
+ auto upload_staging = runtime.UploadStagingBuffer(copy_size);
+ u8* const src_pointer = upload_staging.mapped_span.data();
+ std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
+ runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+ } else {
+ buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
+ }
+
+ return true;
+}
+
+template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
}
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 71d7e1473..351b110fe 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines::Upload {
@@ -16,6 +17,10 @@ State::State(MemoryManager& memory_manager_, Registers& regs_)
State::~State() = default;
+void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
+}
+
void State::ProcessExec(const bool is_linear_) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
@@ -32,7 +37,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
}
const GPUVAddr address{regs.dest.Address()};
if (is_linear) {
- memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+ rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 1c7f1effa..c9c5ec8c3 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -12,6 +12,10 @@ namespace Tegra {
class MemoryManager;
}
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Tegra::Engines::Upload {
struct Registers {
@@ -60,6 +64,9 @@ public:
void ProcessExec(bool is_linear_);
void ProcessData(u32 data, bool is_last_call);
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
private:
u32 write_offset = 0;
u32 copy_size = 0;
@@ -68,6 +75,7 @@ private:
bool is_linear = false;
Registers& regs;
MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
};
} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 492b4c5a3..5a1c12076 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -22,6 +22,7 @@ KeplerCompute::~KeplerCompute() = default;
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
+ upload_state.BindRasterizer(rasterizer);
}
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 560551157..8aed16caa 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -19,6 +19,10 @@ KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
KeplerMemory::~KeplerMemory() = default;
+void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ upload_state.BindRasterizer(rasterizer_);
+}
+
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerMemory register, increase the size of the Regs structure");
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 0d8ea09a9..949e2fae1 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -22,6 +22,10 @@ namespace Tegra {
class MemoryManager;
}
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Tegra::Engines {
/**
@@ -38,6 +42,9 @@ public:
explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
~KeplerMemory() override;
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c38ebd670..5d6d217bb 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -31,6 +31,7 @@ Maxwell3D::~Maxwell3D() = default;
void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
+ upload_state.BindRasterizer(rasterizer_);
}
void Maxwell3D::InitializeRegisterDefaults() {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index f22342dfb..dc9df6c8b 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1557,7 +1557,8 @@ private:
static constexpr u32 null_cb_data = 0xFFFFFFFF;
struct CBDataState {
- std::array<std::array<u32, 0x4000>, 16> buffer;
+ static constexpr size_t inline_size = 0x4000;
+ std::array<std::array<u32, inline_size>, 16> buffer;
u32 current{null_cb_data};
u32 id{null_cb_data};
u32 start_pos{};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 705765c99..ba9ba082f 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -59,6 +59,7 @@ struct GPU::Impl {
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer);
+ kepler_memory->BindRasterizer(rasterizer);
maxwell_dma->BindRasterizer(rasterizer);
}
@@ -502,8 +503,13 @@ struct GPU::Impl {
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
+ break;
case BufferMethods::UnkCacheFlush:
+ rasterizer->SyncGuestHost();
+ break;
case BufferMethods::WrcacheFlush:
+ rasterizer->SignalReference();
+ break;
case BufferMethods::FenceValue:
break;
case BufferMethods::RefCnt:
@@ -513,7 +519,7 @@ struct GPU::Impl {
ProcessFenceActionMethod();
break;
case BufferMethods::WaitForInterrupt:
- ProcessWaitForInterruptMethod();
+ rasterizer->WaitForIdle();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 38d8d9d74..61bfe47c7 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -143,6 +143,8 @@ public:
[[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
+ void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
+
private:
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
@@ -153,8 +155,6 @@ private:
void TryLockPage(PageEntry page_entry, std::size_t size);
void TryUnlockPage(PageEntry page_entry, std::size_t size);
- void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
-
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
bool is_safe) const;
void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index b094fc064..1f1f12291 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -123,6 +123,9 @@ public:
[[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0;
+ virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) = 0;
+
/// Attempt to use a faster method to display the framebuffer to screen
[[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index bb204454e..c5f974080 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -5,9 +5,10 @@
#pragma once
#include <atomic>
+#include <functional>
#include <memory>
-#include <optional>
+#include "common/common_funcs.h"
#include "common/common_types.h"
#include "core/frontend/emu_window.h"
#include "video_core/gpu.h"
@@ -28,8 +29,11 @@ struct RendererSettings {
Layout::FramebufferLayout screenshot_framebuffer_layout;
};
-class RendererBase : NonCopyable {
+class RendererBase {
public:
+ YUZU_NON_COPYABLE(RendererBase);
+ YUZU_NON_MOVEABLE(RendererBase);
+
explicit RendererBase(Core::Frontend::EmuWindow& window,
std::unique_ptr<Core::Frontend::GraphicsContext> context);
virtual ~RendererBase();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9b516c64f..142412a8e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -484,6 +484,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA()
return accelerate_dma;
}
+void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) {
+ auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+ if (!cpu_addr) [[unlikely]] {
+ gpu_memory.WriteBlock(address, memory.data(), copy_size);
+ return;
+ }
+ gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+ {
+ std::unique_lock<std::mutex> lock{buffer_cache.mutex};
+ if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
+ buffer_cache.WriteMemory(*cpu_addr, copy_size);
+ }
+ }
+ {
+ std::scoped_lock lock_texture{texture_cache.mutex};
+ texture_cache.WriteMemory(*cpu_addr, copy_size);
+ }
+ shader_cache.InvalidateRegion(*cpu_addr, copy_size);
+ query_cache.InvalidateRegion(*cpu_addr, copy_size);
+}
+
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (framebuffer_addr == 0) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d0397b745..98f6fd342 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -106,6 +106,8 @@ public:
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
+ void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index b2d5bfd3b..84e07f8bd 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -7,12 +7,14 @@
#include <string_view>
#include <utility>
#include <glad/glad.h>
-#include "common/common_types.h"
+#include "common/common_funcs.h"
namespace OpenGL {
-class OGLRenderbuffer : private NonCopyable {
+class OGLRenderbuffer final {
public:
+ YUZU_NON_COPYABLE(OGLRenderbuffer);
+
OGLRenderbuffer() = default;
OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -36,8 +38,10 @@ public:
GLuint handle = 0;
};
-class OGLTexture : private NonCopyable {
+class OGLTexture final {
public:
+ YUZU_NON_COPYABLE(OGLTexture);
+
OGLTexture() = default;
OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -61,8 +65,10 @@ public:
GLuint handle = 0;
};
-class OGLTextureView : private NonCopyable {
+class OGLTextureView final {
public:
+ YUZU_NON_COPYABLE(OGLTextureView);
+
OGLTextureView() = default;
OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -86,8 +92,10 @@ public:
GLuint handle = 0;
};
-class OGLSampler : private NonCopyable {
+class OGLSampler final {
public:
+ YUZU_NON_COPYABLE(OGLSampler);
+
OGLSampler() = default;
OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -111,8 +119,10 @@ public:
GLuint handle = 0;
};
-class OGLShader : private NonCopyable {
+class OGLShader final {
public:
+ YUZU_NON_COPYABLE(OGLShader);
+
OGLShader() = default;
OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -132,8 +142,10 @@ public:
GLuint handle = 0;
};
-class OGLProgram : private NonCopyable {
+class OGLProgram final {
public:
+ YUZU_NON_COPYABLE(OGLProgram);
+
OGLProgram() = default;
OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -154,8 +166,10 @@ public:
GLuint handle = 0;
};
-class OGLAssemblyProgram : private NonCopyable {
+class OGLAssemblyProgram final {
public:
+ YUZU_NON_COPYABLE(OGLAssemblyProgram);
+
OGLAssemblyProgram() = default;
OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -176,8 +190,10 @@ public:
GLuint handle = 0;
};
-class OGLPipeline : private NonCopyable {
+class OGLPipeline final {
public:
+ YUZU_NON_COPYABLE(OGLPipeline);
+
OGLPipeline() = default;
OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {}
@@ -198,8 +214,10 @@ public:
GLuint handle = 0;
};
-class OGLBuffer : private NonCopyable {
+class OGLBuffer final {
public:
+ YUZU_NON_COPYABLE(OGLBuffer);
+
OGLBuffer() = default;
OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -223,8 +241,10 @@ public:
GLuint handle = 0;
};
-class OGLSync : private NonCopyable {
+class OGLSync final {
public:
+ YUZU_NON_COPYABLE(OGLSync);
+
OGLSync() = default;
OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {}
@@ -247,8 +267,10 @@ public:
GLsync handle = 0;
};
-class OGLFramebuffer : private NonCopyable {
+class OGLFramebuffer final {
public:
+ YUZU_NON_COPYABLE(OGLFramebuffer);
+
OGLFramebuffer() = default;
OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -272,8 +294,10 @@ public:
GLuint handle = 0;
};
-class OGLQuery : private NonCopyable {
+class OGLQuery final {
public:
+ YUZU_NON_COPYABLE(OGLQuery);
+
OGLQuery() = default;
OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index fd334a146..2227d9197 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -548,6 +548,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
return accelerate_dma;
}
+void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) {
+ auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+ if (!cpu_addr) [[unlikely]] {
+ gpu_memory.WriteBlock(address, memory.data(), copy_size);
+ return;
+ }
+ gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+ {
+ std::unique_lock<std::mutex> lock{buffer_cache.mutex};
+ if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
+ buffer_cache.WriteMemory(*cpu_addr, copy_size);
+ }
+ }
+ {
+ std::scoped_lock lock_texture{texture_cache.mutex};
+ texture_cache.WriteMemory(*cpu_addr, copy_size);
+ }
+ pipeline_cache.InvalidateRegion(*cpu_addr, copy_size);
+ query_cache.InvalidateRegion(*cpu_addr, copy_size);
+}
+
bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 866827247..5af2e275b 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -99,6 +99,8 @@ public:
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
+ void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 3bfdf41ba..7d9d4f7ba 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -140,12 +140,12 @@ bool VKScheduler::UpdateRescaling(bool is_rescaling) {
void VKScheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:VulkanWorker");
do {
- if (work_queue.empty()) {
- wait_cv.notify_all();
- }
std::unique_ptr<CommandChunk> work;
{
std::unique_lock lock{work_mutex};
+ if (work_queue.empty()) {
+ wait_cv.notify_all();
+ }
work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
if (stop_token.stop_requested()) {
continue;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 1b06c9296..e69aa136b 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -146,6 +146,7 @@ private:
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+ recorded_counts++;
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
@@ -167,7 +168,7 @@ private:
}
bool Empty() const {
- return command_offset == 0;
+ return recorded_counts == 0;
}
bool HasSubmit() const {
@@ -178,6 +179,7 @@ private:
Command* first = nullptr;
Command* last = nullptr;
+ size_t recorded_counts = 0;
size_t command_offset = 0;
bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 78bf90c48..87636857d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -170,7 +170,7 @@ void ShaderCache::RemovePendingShaders() {
marked_for_removal.clear();
if (!removed_shaders.empty()) {
- RemoveShadersFromStorage(std::move(removed_shaders));
+ RemoveShadersFromStorage(removed_shaders);
}
}
@@ -213,7 +213,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) {
rasterizer.UpdatePagesCachedCount(addr, size, -1);
}
-void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
+void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) {
// Remove them from the cache
std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 136fe294c..8836bc8c6 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -4,7 +4,6 @@
#pragma once
-#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
@@ -138,7 +137,7 @@ private:
/// @param removed_shaders Shaders to be removed from the storage
/// @pre invalidation_mutex is locked
/// @pre lookup_mutex is locked
- void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
+ void RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders);
/// @brief Creates a new entry in the lookup cache and returns its pointer
/// @pre lookup_mutex is locked