summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h39
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h5
-rw-r--r--src/video_core/engines/maxwell_dma.cpp7
-rw-r--r--src/video_core/fence_manager.h2
-rw-r--r--src/video_core/gpu.cpp12
-rw-r--r--src/video_core/gpu.h4
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/host1x/codecs/codec.cpp2
-rw-r--r--src/video_core/host1x/codecs/h264.cpp14
-rw-r--r--src/video_core/host1x/codecs/h264.h12
-rw-r--r--src/video_core/host1x/codecs/vp8.cpp2
-rw-r--r--src/video_core/host1x/codecs/vp8.h7
-rw-r--r--src/video_core/host1x/codecs/vp9.cpp1
-rw-r--r--src/video_core/host1x/codecs/vp9.h8
-rw-r--r--src/video_core/host1x/codecs/vp9_types.h1
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp5
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp35
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp34
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h3
-rw-r--r--src/video_core/shader_cache.cpp2
-rw-r--r--src/video_core/shader_cache.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h25
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp8
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp6
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.cpp17
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h2
30 files changed, 191 insertions, 83 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 58a45ab67..b5ed3380f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -115,7 +115,34 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
template <class P>
void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
- memory_tracker.CachedCpuWrite(cpu_addr, size);
+ const bool is_dirty = IsRegionRegistered(cpu_addr, size);
+ if (!is_dirty) {
+ return;
+ }
+ VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE);
+ VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE);
+ if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
+ WriteMemory(cpu_addr, size);
+ return;
+ }
+
+ tmp_buffer.resize_destructive(size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size);
+
+ InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
+}
+
+template <class P>
+bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) {
+ const bool is_dirty = IsRegionRegistered(cpu_addr, size);
+ if (!is_dirty) {
+ return false;
+ }
+ if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
+ return true;
+ }
+ WriteMemory(cpu_addr, size);
+ return false;
}
template <class P>
@@ -1553,6 +1580,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
return false;
}
+ InlineMemoryImplementation(dest_address, copy_size, inlined_buffer);
+
+ return true;
+}
+
+template <class P>
+void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
+ std::span<const u8> inlined_buffer) {
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
ClearDownload(subtract_interval);
common_ranges.subtract(subtract_interval);
@@ -1574,8 +1609,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
} else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
}
-
- return true;
}
template <class P>
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index fe6068cfe..460fc7551 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -245,6 +245,8 @@ public:
void CachedWriteMemory(VAddr cpu_addr, u64 size);
+ bool OnCPUWrite(VAddr cpu_addr, u64 size);
+
void DownloadMemory(VAddr cpu_addr, u64 size);
std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
@@ -543,6 +545,9 @@ private:
void ClearDownload(IntervalType subtract_interval);
+ void InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
+ std::span<const u8> inlined_buffer);
+
VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a290d6ea7..f8598fd98 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -174,8 +174,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
src_operand.address = regs.offset_in;
DMA::BufferOperand dst_operand;
- u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
- dst_operand.pitch = abs_pitch_out;
+ dst_operand.pitch = static_cast<u32>(std::abs(regs.pitch_out));
dst_operand.width = regs.line_length_in;
dst_operand.height = regs.line_count;
dst_operand.address = regs.offset_out;
@@ -222,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
- const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
+ const size_t dst_size = dst_operand.pitch * regs.line_count;
read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size);
@@ -231,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
- abs_pitch_out);
+ dst_operand.pitch);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 35d699bbf..ab20ff30f 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -69,7 +69,6 @@ public:
}
void SignalFence(std::function<void()>&& func) {
- rasterizer.InvalidateGPUCache();
bool delay_fence = Settings::IsGPULevelHigh();
if constexpr (!can_async_check) {
TryReleasePendingFences<false>();
@@ -96,6 +95,7 @@ public:
guard.unlock();
cv.notify_all();
}
+ rasterizer.InvalidateGPUCache();
}
void SignalSyncPoint(u32 value) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index db385076d..c192e33b2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -95,7 +95,9 @@ struct GPU::Impl {
/// Synchronizes CPU writes with Host GPU memory.
void InvalidateGPUCache() {
- rasterizer->InvalidateGPUCache();
+ std::function<void(VAddr, size_t)> callback_writes(
+ [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
+ system.GatherGPUDirtyMemory(callback_writes);
}
/// Signal the ending of command list.
@@ -299,6 +301,10 @@ struct GPU::Impl {
gpu_thread.InvalidateRegion(addr, size);
}
+ bool OnCPUWrite(VAddr addr, u64 size) {
+ return rasterizer->OnCPUWrite(addr, size);
+ }
+
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
@@ -561,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) {
impl->InvalidateRegion(addr, size);
}
+bool GPU::OnCPUWrite(VAddr addr, u64 size) {
+ return impl->OnCPUWrite(addr, size);
+}
+
void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
impl->FlushAndInvalidateRegion(addr, size);
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index e49c40cf2..ba2838b89 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -250,6 +250,10 @@ public:
/// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(VAddr addr, u64 size);
+ /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
+ /// sensible, false otherwise
+ bool OnCPUWrite(VAddr addr, u64 size);
+
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size);
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 889144f38..2f0f9f593 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
rasterizer->FlushRegion(flush->addr, flush->size);
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
- rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
+ rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);
} else {
ASSERT(false);
}
@@ -102,12 +102,12 @@ void ThreadManager::TickGPU() {
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
- rasterizer->OnCPUWrite(addr, size);
+ rasterizer->OnCacheInvalidation(addr, size);
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
- rasterizer->OnCPUWrite(addr, size);
+ rasterizer->OnCacheInvalidation(addr, size);
}
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index cd6a3a9b8..da07a556f 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -290,7 +290,7 @@ void Codec::Decode() {
return vp9_decoder->GetFrameBytes();
default:
ASSERT(false);
- return std::vector<u8>{};
+ return std::span<const u8>{};
}
}();
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index ce827eb6c..862904e39 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -29,15 +29,15 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
H264::~H264() = default;
-const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
- bool is_first_frame) {
+std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
+ bool is_first_frame) {
H264DecoderContext context;
host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
sizeof(H264DecoderContext));
const s64 frame_number = context.h264_parameter_set.frame_number.Value();
if (!is_first_frame && frame_number != 0) {
- frame.resize(context.stream_len);
+ frame.resize_destructive(context.stream_len);
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
return frame;
}
@@ -135,14 +135,14 @@ const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegist
for (s32 index = 0; index < 6; index++) {
writer.WriteBit(true);
std::span<const u8> matrix{context.weight_scale};
- writer.WriteScalingList(matrix, index * 16, 16);
+ writer.WriteScalingList(scan, matrix, index * 16, 16);
}
if (context.h264_parameter_set.transform_8x8_mode_flag) {
for (s32 index = 0; index < 2; index++) {
writer.WriteBit(true);
std::span<const u8> matrix{context.weight_scale_8x8};
- writer.WriteScalingList(matrix, index * 64, 64);
+ writer.WriteScalingList(scan, matrix, index * 64, 64);
}
}
@@ -188,8 +188,8 @@ void H264BitWriter::WriteBit(bool state) {
WriteBits(state ? 1 : 0, 1);
}
-void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
- static Common::ScratchBuffer<u8> scan{};
+void H264BitWriter::WriteScalingList(Common::ScratchBuffer<u8>& scan, std::span<const u8> list,
+ s32 start, s32 count) {
scan.resize_destructive(count);
if (count == 16) {
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h
index 5cc86454e..d6b556322 100644
--- a/src/video_core/host1x/codecs/h264.h
+++ b/src/video_core/host1x/codecs/h264.h
@@ -5,9 +5,11 @@
#include <span>
#include <vector>
+
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
@@ -37,7 +39,8 @@ public:
/// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
/// Writes the scaling matrices of the sream
- void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
+ void WriteScalingList(Common::ScratchBuffer<u8>& scan, std::span<const u8> list, s32 start,
+ s32 count);
/// Return the bitstream as a vector.
[[nodiscard]] std::vector<u8>& GetByteArray();
@@ -63,11 +66,12 @@ public:
~H264();
/// Compose the H264 frame for FFmpeg decoding
- [[nodiscard]] const std::vector<u8>& ComposeFrame(
- const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
+ [[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
+ bool is_first_frame = false);
private:
- std::vector<u8> frame;
+ Common::ScratchBuffer<u8> frame;
+ Common::ScratchBuffer<u8> scan;
Host1x::Host1x& host1x;
struct H264ParameterSet {
diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
index 28fb12cb8..ee6392ff9 100644
--- a/src/video_core/host1x/codecs/vp8.cpp
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -12,7 +12,7 @@ VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {}
VP8::~VP8() = default;
-const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
+std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
VP8PictureInfo info;
host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
diff --git a/src/video_core/host1x/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h
index 5bf07ecab..7926b73f3 100644
--- a/src/video_core/host1x/codecs/vp8.h
+++ b/src/video_core/host1x/codecs/vp8.h
@@ -4,10 +4,11 @@
#pragma once
#include <array>
-#include <vector>
+#include <span>
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
#include "video_core/host1x/nvdec_common.h"
namespace Tegra {
@@ -24,11 +25,11 @@ public:
~VP8();
/// Compose the VP8 frame for FFmpeg decoding
- [[nodiscard]] const std::vector<u8>& ComposeFrame(
+ [[nodiscard]] std::span<const u8> ComposeFrame(
const Host1x::NvdecCommon::NvdecRegisters& state);
private:
- std::vector<u8> frame;
+ Common::ScratchBuffer<u8> frame;
Host1x::Host1x& host1x;
struct VP8PictureInfo {
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
index cf40c9012..306c3d0e8 100644
--- a/src/video_core/host1x/codecs/vp9.cpp
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -3,6 +3,7 @@
#include <algorithm> // for std::copy
#include <numeric>
+
#include "common/assert.h"
#include "video_core/host1x/codecs/vp9.h"
#include "video_core/host1x/host1x.h"
diff --git a/src/video_core/host1x/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h
index d4083e8d3..f1ed19508 100644
--- a/src/video_core/host1x/codecs/vp9.h
+++ b/src/video_core/host1x/codecs/vp9.h
@@ -4,9 +4,11 @@
#pragma once
#include <array>
+#include <span>
#include <vector>
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
#include "common/stream.h"
#include "video_core/host1x/codecs/vp9_types.h"
#include "video_core/host1x/nvdec_common.h"
@@ -128,8 +130,8 @@ public:
return !current_frame_info.show_frame;
}
- /// Returns a const reference to the composed frame data.
- [[nodiscard]] const std::vector<u8>& GetFrameBytes() const {
+ /// Returns a const span to the composed frame data.
+ [[nodiscard]] std::span<const u8> GetFrameBytes() const {
return frame;
}
@@ -181,7 +183,7 @@ private:
[[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
Host1x::Host1x& host1x;
- std::vector<u8> frame;
+ Common::ScratchBuffer<u8> frame;
std::array<s8, 4> loop_filter_ref_deltas{};
std::array<s8, 2> loop_filter_mode_deltas{};
diff --git a/src/video_core/host1x/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h
index adad8ed7e..cc9b25690 100644
--- a/src/video_core/host1x/codecs/vp9_types.h
+++ b/src/video_core/host1x/codecs/vp9_types.h
@@ -5,6 +5,7 @@
#include <array>
#include <vector>
+
#include "common/common_funcs.h"
#include "common/common_types.h"
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 7566a8c4e..cb8029a4f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -109,7 +109,9 @@ public:
}
/// Notify rasterizer that any caches of the specified region are desync with guest
- virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
+ virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0;
+
+ virtual bool OnCPUWrite(VAddr addr, u64 size) = 0;
/// Sync memory between guest and host.
virtual void InvalidateGPUCache() = 0;
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index bf2ce4c49..92ecf6682 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp
return false;
}
void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
-void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {}
+bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {
+ return false;
+}
+void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {}
VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) {
VideoCore::RasterizerDownloadArea new_area{
.start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index a8d35d2c1..93b9a6971 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -53,7 +53,8 @@ public:
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
- void OnCPUWrite(VAddr addr, u64 size) override;
+ void OnCacheInvalidation(VAddr addr, u64 size) override;
+ bool OnCPUWrite(VAddr addr, u64 size) override;
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index edf527f2d..aadd6967c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
}
}
-void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
+bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
+ MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ if (addr == 0 || size == 0) {
+ return false;
+ }
+
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ if (buffer_cache.OnCPUWrite(addr, size)) {
+ return true;
+ }
+ }
+
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.WriteMemory(addr, size);
+ }
+
+ shader_cache.InvalidateRegion(addr, size);
+ return false;
+}
+
+void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
- shader_cache.OnCPUWrite(addr, size);
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
@@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.CachedWriteMemory(addr, size);
}
+ shader_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::InvalidateGPUCache() {
- MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- shader_cache.SyncGuestHost();
- {
- std::scoped_lock lock{buffer_cache.mutex};
- buffer_cache.FlushCachedWrites();
- }
+ gpu.InvalidateGPUCache();
}
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
@@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
- shader_cache.OnCPUWrite(addr, size);
+ shader_cache.OnCacheInvalidation(addr, size);
}
void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a73ad15c1..8eda2ddba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -98,7 +98,8 @@ public:
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
- void OnCPUWrite(VAddr addr, u64 size) override;
+ void OnCacheInvalidation(VAddr addr, u64 size) override;
+ bool OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index b72f95235..51df18ec3 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -591,7 +591,7 @@ void BufferCacheRuntime::ReserveNullBuffer() {
.flags = 0,
.size = 4,
.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
- VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -599,7 +599,6 @@ void BufferCacheRuntime::ReserveNullBuffer() {
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
- create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f7c0d939a..456bb040e 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -566,11 +566,32 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s
}
}
-void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
+bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
+ if (addr == 0 || size == 0) {
+ return false;
+ }
+
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ if (buffer_cache.OnCPUWrite(addr, size)) {
+ return true;
+ }
+ }
+
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.WriteMemory(addr, size);
+ }
+
+ pipeline_cache.InvalidateRegion(addr, size);
+ return false;
+}
+
+void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
- pipeline_cache.OnCPUWrite(addr, size);
+
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
@@ -579,14 +600,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.CachedWriteMemory(addr, size);
}
+ pipeline_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::InvalidateGPUCache() {
- pipeline_cache.SyncGuestHost();
- {
- std::scoped_lock lock{buffer_cache.mutex};
- buffer_cache.FlushCachedWrites();
- }
+ gpu.InvalidateGPUCache();
}
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
@@ -598,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
- pipeline_cache.OnCPUWrite(addr, size);
+ pipeline_cache.OnCacheInvalidation(addr, size);
}
void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index b39710b3c..73257d964 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -96,7 +96,8 @@ public:
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
- void OnCPUWrite(VAddr addr, u64 size) override;
+ void OnCacheInvalidation(VAddr addr, u64 size) override;
+ bool OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 4db948b6d..01701201d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
RemovePendingShaders();
}
-void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
+void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {
std::scoped_lock lock{invalidation_mutex};
InvalidatePagesInRegion(addr, size);
}
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index f3cc4c70b..de8e08002 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -62,7 +62,7 @@ public:
/// @brief Unmarks a memory region as cached and marks it for removal
/// @param addr Start address of the CPU write operation
/// @param size Number of bytes of the CPU write operation
- void OnCPUWrite(VAddr addr, size_t size);
+ void OnCacheInvalidation(VAddr addr, size_t size);
/// @brief Flushes delayed removal operations
void SyncGuestHost();
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d3f03a995..79f158db4 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -598,14 +598,6 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
- if (True(image.flags & ImageFlagBits::CpuModified)) {
- return;
- }
- image.flags |= ImageFlagBits::CpuModified;
- if (True(image.flags & ImageFlagBits::Tracked)) {
- UntrackImage(image, id);
- }
- /*
if (True(image.flags & ImageFlagBits::Remapped)) {
continue;
}
@@ -613,7 +605,6 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, id);
}
- */
}
}
@@ -874,11 +865,19 @@ void TextureCache<P>::PopAsyncFlushes() {
template <class P>
ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) {
const ImageInfo dst_info(operand);
- const ImageId image_id = FindDMAImage(dst_info, operand.address);
- if (!image_id) {
+ const ImageId dst_id = FindDMAImage(dst_info, operand.address);
+ if (!dst_id) {
+ return NULL_IMAGE_ID;
+ }
+ auto& image = slot_images[dst_id];
+ if (False(image.flags & ImageFlagBits::GpuModified)) {
+ // No need to waste time on an image that's synced with guest
+ return NULL_IMAGE_ID;
+ }
+ if (image.info.type == ImageType::e3D) {
+ // Don't accelerate 3D images.
return NULL_IMAGE_ID;
}
- auto& image = slot_images[image_id];
if (!is_upload && !image.info.dma_downloaded) {
// Force a full sync.
image.info.dma_downloaded = true;
@@ -888,7 +887,7 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo
if (!base) {
return NULL_IMAGE_ID;
}
- return image_id;
+ return dst_id;
}
template <class P>
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 70436cf1c..421e71e5a 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -528,6 +528,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
sets_per_pool = 64;
+ if (extensions.extended_dynamic_state3 && is_amd_driver &&
+ properties.properties.driverVersion >= VK_MAKE_API_VERSION(0, 2, 0, 270)) {
+ LOG_WARNING(Render_Vulkan,
+ "AMD drivers after 23.5.2 have broken extendedDynamicState3ColorBlendEquation");
+ features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
+ features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
+ dynamic_state3_blending = false;
+ }
if (is_amd_driver) {
// AMD drivers need a higher amount of Sets per Pool in certain circumstances like in XC2.
sets_per_pool = 96;
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index 7624a9b32..6a294c1da 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -19,11 +19,9 @@
#include <windows.h>
// ensure include order
#include <vulkan/vulkan_win32.h>
-#elif defined(__APPLE__)
-#include <vulkan/vulkan_macos.h>
#elif defined(__ANDROID__)
#include <vulkan/vulkan_android.h>
-#else
+#elif !defined(__APPLE__)
#include <X11/Xlib.h>
#include <vulkan/vulkan_wayland.h>
#include <vulkan/vulkan_xlib.h>
@@ -68,7 +66,7 @@ namespace {
break;
#elif defined(__APPLE__)
case Core::Frontend::WindowSystemType::Cocoa:
- extensions.push_back(VK_MVK_MACOS_SURFACE_EXTENSION_NAME);
+ extensions.push_back(VK_EXT_METAL_SURFACE_EXTENSION_NAME);
break;
#elif defined(__ANDROID__)
case Core::Frontend::WindowSystemType::Android:
diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp
index c34599365..cfea4cd7b 100644
--- a/src/video_core/vulkan_common/vulkan_surface.cpp
+++ b/src/video_core/vulkan_common/vulkan_surface.cpp
@@ -11,11 +11,9 @@
#include <windows.h>
// ensure include order
#include <vulkan/vulkan_win32.h>
-#elif defined(__APPLE__)
-#include <vulkan/vulkan_macos.h>
#elif defined(__ANDROID__)
#include <vulkan/vulkan_android.h>
-#else
+#elif !defined(__APPLE__)
#include <X11/Xlib.h>
#include <vulkan/vulkan_wayland.h>
#include <vulkan/vulkan_xlib.h>
@@ -44,12 +42,13 @@ vk::SurfaceKHR CreateSurface(
}
#elif defined(__APPLE__)
if (window_info.type == Core::Frontend::WindowSystemType::Cocoa) {
- const VkMacOSSurfaceCreateInfoMVK mvk_ci{VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK,
- nullptr, 0, window_info.render_surface};
- const auto vkCreateMacOSSurfaceMVK = reinterpret_cast<PFN_vkCreateMacOSSurfaceMVK>(
- dld.vkGetInstanceProcAddr(*instance, "vkCreateMacOSSurfaceMVK"));
- if (!vkCreateMacOSSurfaceMVK ||
- vkCreateMacOSSurfaceMVK(*instance, &mvk_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
+ const VkMetalSurfaceCreateInfoEXT macos_ci = {
+ .pLayer = static_cast<const CAMetalLayer*>(window_info.render_surface),
+ };
+ const auto vkCreateMetalSurfaceEXT = reinterpret_cast<PFN_vkCreateMetalSurfaceEXT>(
+ dld.vkGetInstanceProcAddr(*instance, "vkCreateMetalSurfaceEXT"));
+ if (!vkCreateMetalSurfaceEXT ||
+ vkCreateMetalSurfaceEXT(*instance, &macos_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Metal surface");
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index b5e70fcd4..32bd75ad8 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -15,6 +15,8 @@
#define VK_NO_PROTOTYPES
#ifdef _WIN32
#define VK_USE_PLATFORM_WIN32_KHR
+#elif defined(__APPLE__)
+#define VK_USE_PLATFORM_METAL_EXT
#endif
#include <vulkan/vulkan.h>