summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h49
-rw-r--r--src/video_core/engines/engine_upload.cpp7
-rw-r--r--src/video_core/engines/engine_upload.h8
-rw-r--r--src/video_core/engines/kepler_compute.cpp1
-rw-r--r--src/video_core/engines/kepler_memory.cpp4
-rw-r--r--src/video_core/engines/kepler_memory.h7
-rw-r--r--src/video_core/engines/maxwell_3d.cpp3
-rw-r--r--src/video_core/engines/maxwell_3d.h3
-rw-r--r--src/video_core/gpu.cpp8
-rw-r--r--src/video_core/gpu.h16
-rw-r--r--src/video_core/macro/macro.cpp13
-rw-r--r--src/video_core/macro/macro.h2
-rw-r--r--src/video_core/macro/macro_hle.cpp34
-rw-r--r--src/video_core/macro/macro_hle.h21
-rw-r--r--src/video_core/macro/macro_interpreter.cpp92
-rw-r--r--src/video_core/macro/macro_interpreter.h78
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp104
-rw-r--r--src/video_core/macro/macro_jit_x64.h71
-rw-r--r--src/video_core/memory_manager.h4
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h2
-rw-r--r--src/video_core/shader_cache.cpp4
-rw-r--r--src/video_core/shader_cache.h3
27 files changed, 343 insertions, 242 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 43bed63ac..fa26eb8b0 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -131,6 +131,8 @@ public:
void DownloadMemory(VAddr cpu_addr, u64 size);
+ bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer);
+
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
void DisableGraphicsUniformBuffer(size_t stage, u32 index);
@@ -808,6 +810,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
return;
}
MICROPROFILE_SCOPE(GPU_DownloadMemory);
+ const bool is_accuracy_normal =
+ Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0;
@@ -819,6 +823,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
buffer.ForEachDownloadRangeAndClear(
cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ if (is_accuracy_normal) {
+ return;
+ }
const VAddr buffer_addr = buffer.CpuAddr();
const auto add_download = [&](VAddr start, VAddr end) {
const u64 new_offset = start - buffer_addr;
@@ -1417,10 +1424,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
const IntervalType base_interval{cpu_addr, cpu_addr + size};
common_ranges.add(base_interval);
- const bool is_accuracy_high =
- Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
- if (!is_async && !is_accuracy_high) {
+ if (!is_async) {
return;
}
uncommitted_ranges.add(base_interval);
@@ -1474,6 +1479,8 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
// When this memory region has been joined a bunch of times, we assume it's being used
// as a stream buffer. Increase the size to skip constantly recreating buffers.
has_stream_leap = true;
+ begin -= PAGE_SIZE * 256;
+ cpu_addr = begin;
end += PAGE_SIZE * 256;
}
}
@@ -1642,6 +1649,42 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
}
template <class P>
+bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
+ std::span<u8> inlined_buffer) {
+ const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
+ if (!is_dirty) {
+ return false;
+ }
+ if (!IsRegionGpuModified(dest_address, copy_size)) {
+ return false;
+ }
+
+ const IntervalType subtract_interval{dest_address, dest_address + copy_size};
+ ClearDownload(subtract_interval);
+ common_ranges.subtract(subtract_interval);
+
+ BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
+ auto& buffer = slot_buffers[buffer_id];
+ SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
+
+ if constexpr (USE_MEMORY_MAPS) {
+ std::array copies{BufferCopy{
+ .src_offset = 0,
+ .dst_offset = buffer.Offset(dest_address),
+ .size = copy_size,
+ }};
+ auto upload_staging = runtime.UploadStagingBuffer(copy_size);
+ u8* const src_pointer = upload_staging.mapped_span.data();
+ std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
+ runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+ } else {
+ buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
+ }
+
+ return true;
+}
+
+template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
}
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 71d7e1473..351b110fe 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines::Upload {
@@ -16,6 +17,10 @@ State::State(MemoryManager& memory_manager_, Registers& regs_)
State::~State() = default;
+void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
+}
+
void State::ProcessExec(const bool is_linear_) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
@@ -32,7 +37,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
}
const GPUVAddr address{regs.dest.Address()};
if (is_linear) {
- memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+ rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 1c7f1effa..c9c5ec8c3 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -12,6 +12,10 @@ namespace Tegra {
class MemoryManager;
}
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Tegra::Engines::Upload {
struct Registers {
@@ -60,6 +64,9 @@ public:
void ProcessExec(bool is_linear_);
void ProcessData(u32 data, bool is_last_call);
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
private:
u32 write_offset = 0;
u32 copy_size = 0;
@@ -68,6 +75,7 @@ private:
bool is_linear = false;
Registers& regs;
MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
};
} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 492b4c5a3..5a1c12076 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -22,6 +22,7 @@ KeplerCompute::~KeplerCompute() = default;
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
+ upload_state.BindRasterizer(rasterizer);
}
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 560551157..8aed16caa 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -19,6 +19,10 @@ KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
KeplerMemory::~KeplerMemory() = default;
+void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ upload_state.BindRasterizer(rasterizer_);
+}
+
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerMemory register, increase the size of the Regs structure");
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 0d8ea09a9..949e2fae1 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -22,6 +22,10 @@ namespace Tegra {
class MemoryManager;
}
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Tegra::Engines {
/**
@@ -38,6 +42,9 @@ public:
explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
~KeplerMemory() override;
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b18b8a02a..5d6d217bb 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -31,6 +31,7 @@ Maxwell3D::~Maxwell3D() = default;
void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
+ upload_state.BindRasterizer(rasterizer_);
}
void Maxwell3D::InitializeRegisterDefaults() {
@@ -240,7 +241,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
// Execute the current macro.
- macro_engine->Execute(*this, macro_positions[entry], parameters);
+ macro_engine->Execute(macro_positions[entry], parameters);
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
FlushMMEInlineDraw();
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index f22342dfb..dc9df6c8b 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1557,7 +1557,8 @@ private:
static constexpr u32 null_cb_data = 0xFFFFFFFF;
struct CBDataState {
- std::array<std::array<u32, 0x4000>, 16> buffer;
+ static constexpr size_t inline_size = 0x4000;
+ std::array<std::array<u32, inline_size>, 16> buffer;
u32 current{null_cb_data};
u32 id{null_cb_data};
u32 start_pos{};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 705765c99..ba9ba082f 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -59,6 +59,7 @@ struct GPU::Impl {
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer);
+ kepler_memory->BindRasterizer(rasterizer);
maxwell_dma->BindRasterizer(rasterizer);
}
@@ -502,8 +503,13 @@ struct GPU::Impl {
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
+ break;
case BufferMethods::UnkCacheFlush:
+ rasterizer->SyncGuestHost();
+ break;
case BufferMethods::WrcacheFlush:
+ rasterizer->SignalReference();
+ break;
case BufferMethods::FenceValue:
break;
case BufferMethods::RefCnt:
@@ -513,7 +519,7 @@ struct GPU::Impl {
ProcessFenceActionMethod();
break;
case BufferMethods::WaitForInterrupt:
- ProcessWaitForInterruptMethod();
+ rasterizer->WaitForIdle();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 3188b83ed..26b8ea233 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -12,9 +12,6 @@
#include "video_core/framebuffer_config.h"
namespace Core {
-namespace Frontend {
-class EmuWindow;
-}
class System;
} // namespace Core
@@ -25,7 +22,6 @@ class ShaderNotify;
namespace Tegra {
class DmaPusher;
-class CDmaPusher;
struct CommandList;
enum class RenderTargetFormat : u32 {
@@ -88,15 +84,9 @@ enum class DepthFormat : u32 {
D32_FLOAT_S8X24_UINT = 0x19,
};
-struct CommandListHeader;
-class DebugContext;
-
namespace Engines {
-class Fermi2D;
class Maxwell3D;
-class MaxwellDMA;
class KeplerCompute;
-class KeplerMemory;
} // namespace Engines
enum class EngineID {
@@ -190,12 +180,6 @@ public:
/// Returns a const reference to the GPU DMA pusher.
[[nodiscard]] const Tegra::DmaPusher& DmaPusher() const;
- /// Returns a reference to the GPU CDMA pusher.
- [[nodiscard]] Tegra::CDmaPusher& CDmaPusher();
-
- /// Returns a const reference to the GPU CDMA pusher.
- [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const;
-
/// Returns a reference to the underlying renderer.
[[nodiscard]] VideoCore::RendererBase& Renderer();
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index d7fabe605..0aeda4ce8 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,12 +2,13 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cstring>
#include <optional>
+
#include <boost/container_hash/hash.hpp>
+
#include "common/assert.h"
-#include "common/logging/log.h"
#include "common/settings.h"
-#include "video_core/engines/maxwell_3d.h"
#include "video_core/macro/macro.h"
#include "video_core/macro/macro_hle.h"
#include "video_core/macro/macro_interpreter.h"
@@ -24,8 +25,7 @@ void MacroEngine::AddCode(u32 method, u32 data) {
uploaded_macro_code[method].push_back(data);
}
-void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
- const std::vector<u32>& parameters) {
+void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
auto compiled_macro = macro_cache.find(method);
if (compiled_macro != macro_cache.end()) {
const auto& cache_info = compiled_macro->second;
@@ -66,10 +66,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
cache_info.lle_program = Compile(code);
}
- auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
- if (hle_program.has_value()) {
+ if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
cache_info.has_hle_program = true;
- cache_info.hle_program = std::move(hle_program.value());
+ cache_info.hle_program = std::move(hle_program);
cache_info.hle_program->Execute(parameters, method);
} else {
cache_info.lle_program->Execute(parameters, method);
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index 31ee3440a..7aaa49286 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -119,7 +119,7 @@ public:
void AddCode(u32 method, u32 data);
// Compiles the macro if its not in the cache, and executes the compiled macro
- void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
+ void Execute(u32 method, const std::vector<u32>& parameters);
protected:
virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 70ac7c620..900ad23c9 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -5,12 +5,15 @@
#include <array>
#include <vector>
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/macro/macro.h"
#include "video_core/macro/macro_hle.h"
#include "video_core/rasterizer_interface.h"
namespace Tegra {
-
namespace {
+
+using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
+
// HLE'd functions
void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
@@ -77,7 +80,6 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.CallMethodFromMME(0x8e5, 0x0);
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
-} // Anonymous namespace
constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
{0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
@@ -85,25 +87,31 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
{0x0217920100488FF7, &HLE_0217920100488FF7},
}};
+class HLEMacroImpl final : public CachedMacro {
+public:
+ explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
+ : maxwell3d{maxwell3d_}, func{func_} {}
+
+ void Execute(const std::vector<u32>& parameters, u32 method) override {
+ func(maxwell3d, parameters);
+ }
+
+private:
+ Engines::Maxwell3D& maxwell3d;
+ HLEFunction func;
+};
+} // Anonymous namespace
+
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
HLEMacro::~HLEMacro() = default;
-std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
+std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
[hash](const auto& pair) { return pair.first == hash; });
if (it == hle_funcs.end()) {
- return std::nullopt;
+ return nullptr;
}
return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
}
-HLEMacroImpl::~HLEMacroImpl() = default;
-
-HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
- : maxwell3d{maxwell3d_}, func{func_} {}
-
-void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
- func(maxwell3d, parameters);
-}
-
} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
index cb3bd1600..b86ba84a1 100644
--- a/src/video_core/macro/macro_hle.h
+++ b/src/video_core/macro/macro_hle.h
@@ -5,10 +5,7 @@
#pragma once
#include <memory>
-#include <optional>
-#include <vector>
#include "common/common_types.h"
-#include "video_core/macro/macro.h"
namespace Tegra {
@@ -16,29 +13,17 @@ namespace Engines {
class Maxwell3D;
}
-using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
-
class HLEMacro {
public:
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
~HLEMacro();
- std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
-
-private:
- Engines::Maxwell3D& maxwell3d;
-};
-
-class HLEMacroImpl : public CachedMacro {
-public:
- explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
- ~HLEMacroImpl();
-
- void Execute(const std::vector<u32>& parameters, u32 method) override;
+ // Allocates and returns a cached macro if the hash matches a known function.
+ // Returns nullptr otherwise.
+ [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
private:
Engines::Maxwell3D& maxwell3d;
- HLEFunction func;
};
} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index 8da26fd59..fba755448 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -2,6 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
+#include <optional>
+
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
@@ -11,16 +14,81 @@
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
namespace Tegra {
-MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
- : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
+namespace {
+class MacroInterpreterImpl final : public CachedMacro {
+public:
+ explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
+ : maxwell3d{maxwell3d_}, code{code_} {}
-std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
- return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
-}
+ void Execute(const std::vector<u32>& params, u32 method) override;
+
+private:
+ /// Resets the execution engine state, zeroing registers, etc.
+ void Reset();
+
+ /**
+ * Executes a single macro instruction located at the current program counter. Returns whether
+ * the interpreter should keep running.
+ *
+ * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
+ * previous instruction.
+ */
+ bool Step(bool is_delay_slot);
+
+ /// Calculates the result of an ALU operation. src_a OP src_b;
+ u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
+
+ /// Performs the result operation on the input result and stores it in the specified register
+ /// (if necessary).
+ void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
+
+ /// Evaluates the branch condition and returns whether the branch should be taken or not.
+ bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
+
+ /// Reads an opcode at the current program counter location.
+ Macro::Opcode GetOpcode() const;
+
+ /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
+ u32 GetRegister(u32 register_id) const;
+
+ /// Sets the register to the input value.
+ void SetRegister(u32 register_id, u32 value);
+
+ /// Sets the method address to use for the next Send instruction.
+ void SetMethodAddress(u32 address);
-MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_,
- const std::vector<u32>& code_)
- : maxwell3d{maxwell3d_}, code{code_} {}
+ /// Calls a GPU Engine method with the input parameter.
+ void Send(u32 value);
+
+ /// Reads a GPU register located at the method address.
+ u32 Read(u32 method) const;
+
+ /// Returns the next parameter in the parameter queue.
+ u32 FetchParameter();
+
+ Engines::Maxwell3D& maxwell3d;
+
+ /// Current program counter
+ u32 pc{};
+ /// Program counter to execute at after the delay slot is executed.
+ std::optional<u32> delayed_pc;
+
+ /// General purpose macro registers.
+ std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
+
+ /// Method address to use for the next Send instruction.
+ Macro::MethodAddress method_address = {};
+
+ /// Input parameters of the current macro.
+ std::unique_ptr<u32[]> parameters;
+ std::size_t num_parameters = 0;
+ std::size_t parameters_capacity = 0;
+ /// Index of the next parameter that will be fetched by the 'parm' instruction.
+ u32 next_parameter_index = 0;
+
+ bool carry_flag = false;
+ const std::vector<u32>& code;
+};
void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
MICROPROFILE_SCOPE(MacroInterp);
@@ -283,5 +351,13 @@ u32 MacroInterpreterImpl::FetchParameter() {
ASSERT(next_parameter_index < num_parameters);
return parameters[next_parameter_index++];
}
+} // Anonymous namespace
+
+MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
+ : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
+
+std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
+ return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
+}
} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h
index d50c619ce..8a9648e46 100644
--- a/src/video_core/macro/macro_interpreter.h
+++ b/src/video_core/macro/macro_interpreter.h
@@ -3,10 +3,9 @@
// Refer to the license.txt file included.
#pragma once
-#include <array>
-#include <optional>
+
#include <vector>
-#include "common/bit_field.h"
+
#include "common/common_types.h"
#include "video_core/macro/macro.h"
@@ -26,77 +25,4 @@ private:
Engines::Maxwell3D& maxwell3d;
};
-class MacroInterpreterImpl : public CachedMacro {
-public:
- explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
- void Execute(const std::vector<u32>& params, u32 method) override;
-
-private:
- /// Resets the execution engine state, zeroing registers, etc.
- void Reset();
-
- /**
- * Executes a single macro instruction located at the current program counter. Returns whether
- * the interpreter should keep running.
- *
- * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
- * previous instruction.
- */
- bool Step(bool is_delay_slot);
-
- /// Calculates the result of an ALU operation. src_a OP src_b;
- u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
-
- /// Performs the result operation on the input result and stores it in the specified register
- /// (if necessary).
- void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
-
- /// Evaluates the branch condition and returns whether the branch should be taken or not.
- bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
-
- /// Reads an opcode at the current program counter location.
- Macro::Opcode GetOpcode() const;
-
- /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
- u32 GetRegister(u32 register_id) const;
-
- /// Sets the register to the input value.
- void SetRegister(u32 register_id, u32 value);
-
- /// Sets the method address to use for the next Send instruction.
- void SetMethodAddress(u32 address);
-
- /// Calls a GPU Engine method with the input parameter.
- void Send(u32 value);
-
- /// Reads a GPU register located at the method address.
- u32 Read(u32 method) const;
-
- /// Returns the next parameter in the parameter queue.
- u32 FetchParameter();
-
- Engines::Maxwell3D& maxwell3d;
-
- /// Current program counter
- u32 pc;
- /// Program counter to execute at after the delay slot is executed.
- std::optional<u32> delayed_pc;
-
- /// General purpose macro registers.
- std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
-
- /// Method address to use for the next Send instruction.
- Macro::MethodAddress method_address = {};
-
- /// Input parameters of the current macro.
- std::unique_ptr<u32[]> parameters;
- std::size_t num_parameters = 0;
- std::size_t parameters_capacity = 0;
- /// Index of the next parameter that will be fetched by the 'parm' instruction.
- u32 next_parameter_index = 0;
-
- bool carry_flag = false;
- const std::vector<u32>& code;
-};
-
} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index c6b2b2109..47b28ad16 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -2,9 +2,17 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
+#include <bitset>
+#include <optional>
+
+#include <xbyak/xbyak.h>
+
#include "common/assert.h"
+#include "common/bit_field.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
+#include "common/x64/xbyak_abi.h"
#include "common/x64/xbyak_util.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/macro/macro_interpreter.h"
@@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
namespace Tegra {
+namespace {
constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
-static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
+constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
STATE,
RESULT,
PARAMETERS,
@@ -28,19 +37,75 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
BRANCH_HOLDER,
});
-MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
- : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
+// Arbitrarily chosen based on current booting games.
+constexpr size_t MAX_CODE_SIZE = 0x10000;
-std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
- return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
+std::bitset<32> PersistentCallerSavedRegs() {
+ return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
}
-MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
- : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
- Compile();
-}
+class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro {
+public:
+ explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
+ : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
+ Compile();
+ }
+
+ void Execute(const std::vector<u32>& parameters, u32 method) override;
+
+ void Compile_ALU(Macro::Opcode opcode);
+ void Compile_AddImmediate(Macro::Opcode opcode);
+ void Compile_ExtractInsert(Macro::Opcode opcode);
+ void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
+ void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
+ void Compile_Read(Macro::Opcode opcode);
+ void Compile_Branch(Macro::Opcode opcode);
+
+private:
+ void Optimizer_ScanFlags();
+
+ void Compile();
+ bool Compile_NextInstruction();
+
+ Xbyak::Reg32 Compile_FetchParameter();
+ Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
+
+ void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
+ void Compile_Send(Xbyak::Reg32 value);
-MacroJITx64Impl::~MacroJITx64Impl() = default;
+ Macro::Opcode GetOpCode() const;
+
+ struct JITState {
+ Engines::Maxwell3D* maxwell3d{};
+ std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
+ u32 carry_flag{};
+ };
+ static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
+ using ProgramType = void (*)(JITState*, const u32*);
+
+ struct OptimizerState {
+ bool can_skip_carry{};
+ bool has_delayed_pc{};
+ bool zero_reg_skip{};
+ bool skip_dummy_addimmediate{};
+ bool optimize_for_method_move{};
+ bool enable_asserts{};
+ };
+ OptimizerState optimizer{};
+
+ std::optional<Macro::Opcode> next_opcode{};
+ ProgramType program{nullptr};
+
+ std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
+ std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
+ Xbyak::Label end_of_code{};
+
+ bool is_delay_slot{};
+ u32 pc{};
+
+ const std::vector<u32>& code;
+ Engines::Maxwell3D& maxwell3d;
+};
void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
MICROPROFILE_SCOPE(MacroJitExecute);
@@ -307,11 +372,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
-static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
+void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
maxwell3d->CallMethodFromMME(method_address.address, value);
}
-void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
+void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(Common::X64::ABI_PARAM1, qword[STATE]);
mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
@@ -338,7 +403,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
L(dont_process);
}
-void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
+void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
const s32 jump_address =
static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
@@ -392,7 +457,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
L(end);
}
-void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
+void MacroJITx64Impl::Optimizer_ScanFlags() {
optimizer.can_skip_carry = true;
optimizer.has_delayed_pc = false;
for (auto raw_op : code) {
@@ -534,7 +599,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
return true;
}
-Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
+Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
mov(eax, dword[PARAMETERS]);
add(PARAMETERS, sizeof(u32));
return eax;
@@ -611,9 +676,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const {
ASSERT(pc < code.size());
return {code[pc]};
}
+} // Anonymous namespace
-std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const {
- return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
-}
+MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
+ : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
+std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
+ return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
+}
} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index d03d480b4..773b037ae 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -4,12 +4,7 @@
#pragma once
-#include <array>
-#include <bitset>
-#include <xbyak/xbyak.h>
-#include "common/bit_field.h"
#include "common/common_types.h"
-#include "common/x64/xbyak_abi.h"
#include "video_core/macro/macro.h"
namespace Tegra {
@@ -18,9 +13,6 @@ namespace Engines {
class Maxwell3D;
}
-/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
-constexpr size_t MAX_CODE_SIZE = 0x10000;
-
class MacroJITx64 final : public MacroEngine {
public:
explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
@@ -32,67 +24,4 @@ private:
Engines::Maxwell3D& maxwell3d;
};
-class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
-public:
- explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
- ~MacroJITx64Impl();
-
- void Execute(const std::vector<u32>& parameters, u32 method) override;
-
- void Compile_ALU(Macro::Opcode opcode);
- void Compile_AddImmediate(Macro::Opcode opcode);
- void Compile_ExtractInsert(Macro::Opcode opcode);
- void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
- void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
- void Compile_Read(Macro::Opcode opcode);
- void Compile_Branch(Macro::Opcode opcode);
-
-private:
- void Optimizer_ScanFlags();
-
- void Compile();
- bool Compile_NextInstruction();
-
- Xbyak::Reg32 Compile_FetchParameter();
- Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
-
- void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
- void Compile_Send(Xbyak::Reg32 value);
-
- Macro::Opcode GetOpCode() const;
- std::bitset<32> PersistentCallerSavedRegs() const;
-
- struct JITState {
- Engines::Maxwell3D* maxwell3d{};
- std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
- u32 carry_flag{};
- };
- static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
- using ProgramType = void (*)(JITState*, const u32*);
-
- struct OptimizerState {
- bool can_skip_carry{};
- bool has_delayed_pc{};
- bool zero_reg_skip{};
- bool skip_dummy_addimmediate{};
- bool optimize_for_method_move{};
- bool enable_asserts{};
- };
- OptimizerState optimizer{};
-
- std::optional<Macro::Opcode> next_opcode{};
- ProgramType program{nullptr};
-
- std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
- std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
- Xbyak::Label end_of_code{};
-
- bool is_delay_slot{};
- u32 pc{};
- std::optional<u32> delayed_pc;
-
- const std::vector<u32>& code;
- Engines::Maxwell3D& maxwell3d;
-};
-
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 38d8d9d74..61bfe47c7 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -143,6 +143,8 @@ public:
[[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
+ void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
+
private:
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
@@ -153,8 +155,6 @@ private:
void TryLockPage(PageEntry page_entry, std::size_t size);
void TryUnlockPage(PageEntry page_entry, std::size_t size);
- void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
-
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
bool is_safe) const;
void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index b094fc064..1f1f12291 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -123,6 +123,9 @@ public:
[[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0;
+ virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) = 0;
+
/// Attempt to use a faster method to display the framebuffer to screen
[[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9b516c64f..142412a8e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -484,6 +484,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA()
return accelerate_dma;
}
+void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) {
+ auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+ if (!cpu_addr) [[unlikely]] {
+ gpu_memory.WriteBlock(address, memory.data(), copy_size);
+ return;
+ }
+ gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+ {
+ std::unique_lock<std::mutex> lock{buffer_cache.mutex};
+ if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
+ buffer_cache.WriteMemory(*cpu_addr, copy_size);
+ }
+ }
+ {
+ std::scoped_lock lock_texture{texture_cache.mutex};
+ texture_cache.WriteMemory(*cpu_addr, copy_size);
+ }
+ shader_cache.InvalidateRegion(*cpu_addr, copy_size);
+ query_cache.InvalidateRegion(*cpu_addr, copy_size);
+}
+
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (framebuffer_addr == 0) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d0397b745..98f6fd342 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -106,6 +106,8 @@ public:
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
+ void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index 73629d229..b630090e8 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -214,7 +214,7 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im
{
VkImageMemoryBarrier fsr_write_barrier = base_barrier;
- fsr_write_barrier.image = *images[image_index],
+ fsr_write_barrier.image = *images[image_index];
fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index fd334a146..2227d9197 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -548,6 +548,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
return accelerate_dma;
}
+void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) {
+ auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+ if (!cpu_addr) [[unlikely]] {
+ gpu_memory.WriteBlock(address, memory.data(), copy_size);
+ return;
+ }
+ gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+ {
+ std::unique_lock<std::mutex> lock{buffer_cache.mutex};
+ if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
+ buffer_cache.WriteMemory(*cpu_addr, copy_size);
+ }
+ }
+ {
+ std::scoped_lock lock_texture{texture_cache.mutex};
+ texture_cache.WriteMemory(*cpu_addr, copy_size);
+ }
+ pipeline_cache.InvalidateRegion(*cpu_addr, copy_size);
+ query_cache.InvalidateRegion(*cpu_addr, copy_size);
+}
+
bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 866827247..5af2e275b 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -99,6 +99,8 @@ public:
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
+ void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+ std::span<u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 78bf90c48..87636857d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -170,7 +170,7 @@ void ShaderCache::RemovePendingShaders() {
marked_for_removal.clear();
if (!removed_shaders.empty()) {
- RemoveShadersFromStorage(std::move(removed_shaders));
+ RemoveShadersFromStorage(removed_shaders);
}
}
@@ -213,7 +213,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) {
rasterizer.UpdatePagesCachedCount(addr, size, -1);
}
-void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
+void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) {
// Remove them from the cache
std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 136fe294c..8836bc8c6 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -4,7 +4,6 @@
#pragma once
-#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
@@ -138,7 +137,7 @@ private:
/// @param removed_shaders Shaders to be removed from the storage
/// @pre invalidation_mutex is locked
/// @pre lookup_mutex is locked
- void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
+ void RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders);
/// @brief Creates a new entry in the lookup cache and returns its pointer
/// @pre lookup_mutex is locked