diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 3 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 15 | ||||
-rw-r--r-- | src/video_core/gpu.h | 6 | ||||
-rw-r--r-- | src/video_core/gpu_asynch.cpp | 4 | ||||
-rw-r--r-- | src/video_core/gpu_asynch.h | 1 | ||||
-rw-r--r-- | src/video_core/gpu_synch.h | 1 | ||||
-rw-r--r-- | src/video_core/gpu_thread.cpp | 19 | ||||
-rw-r--r-- | src/video_core/gpu_thread.h | 9 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 64 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 5 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.h | 4 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 2 | ||||
-rw-r--r-- | src/video_core/surface.cpp | 1 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 139 |
17 files changed, 158 insertions, 123 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7802fd808..c5ec7d9f1 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -101,7 +101,8 @@ void Maxwell3D::InitializeRegisterDefaults() { #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) void Maxwell3D::InitDirtySettings() { - const auto set_block = [this](const u32 start, const u32 range, const u8 position) { + const auto set_block = [this](const std::size_t start, const std::size_t range, + const u8 position) { const auto start_itr = dirty_pointers.begin() + start; const auto end_itr = start_itr + range; std::fill(start_itr, end_itr, position); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..095660115 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" @@ -17,6 +18,8 @@ namespace Tegra { +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); + GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::WaitFence(u32 syncpoint_id, u32 value) const { + // Synced GPU, is always in sync + if (!is_async) { + return; + } + MICROPROFILE_SCOPE(GPU_wait); + while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { + } +} + void GPU::IncrementSyncPoint(const u32 syncpoint_id) { syncpoints[syncpoint_id]++; std::lock_guard lock{sync_mutex}; @@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); + block.timestamp = system.CoreTiming().GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..dbca19f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,12 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + // Waits for the GPU to finish working + virtual void WaitIdle() const = 0; + + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. + void WaitFence(u32 syncpoint_id, u32 value) const; + void IncrementSyncPoint(u32 syncpoint_id); u32 GetSyncpointValue(u32 syncpoint_id) const; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } +void GPUAsynch::WaitIdle() const { + gpu_thread.WaitIdle(); +} + } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override; protected: void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override {} protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,8 +5,6 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; - system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); + PushCommand(SubmitListCommand(std::move(entries))); } void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } +void ThreadManager::WaitIdle() const { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { + } +} + u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); return fence; } -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -void SynchState::WaitForSynchronization(u64 fence) { - while (signaled_fence.load() < fence) - ; -} - } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -21,9 +21,6 @@ class DmaPusher; namespace Core { class System; -namespace Timing { -struct EventType; -} // namespace Timing } // namespace Core namespace VideoCommon::GPUThread { @@ -89,8 +86,6 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - void WaitForSynchronization(u64 fence); - using CommandQueue = Common::SPSCQueue<CommandDataContainer>; CommandQueue queue; u64 last_fence{}; @@ -121,6 +116,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + // Wait until the gpu thread is idle. + void WaitIdle() const; + private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); @@ -128,7 +126,6 @@ private: private: SynchState state; Core::System& system; - Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a85f730a8..cbcf81414 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + std::lock_guard lock{pages_mutex}; const u64 page_start{addr >> Memory::PAGE_BITS}; const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..c24a02d71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -9,6 +9,7 @@ #include <cstddef> #include <map> #include <memory> +#include <mutex> #include <optional> #include <tuple> #include <utility> @@ -230,6 +231,8 @@ private: using CachedPageMap = boost::icl::interval_map<u64, int>; CachedPageMap cached_pages; + + std::mutex pages_mutex; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6a610a3bc..a3524a6a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1148,7 +1148,7 @@ private: for (const auto& variant : extras) { if (const auto argument = std::get_if<TextureArgument>(&variant)) { expr += GenerateTextureArgument(*argument); - } else if (std::get_if<TextureAoffi>(&variant)) { + } else if (std::holds_alternative<TextureAoffi>(variant)) { expr += GenerateTextureAoffi(meta->aoffi); } else { UNREACHABLE(); @@ -1158,8 +1158,8 @@ private: return expr + ')'; } - std::string GenerateTextureArgument(TextureArgument argument) { - const auto [type, operand] = argument; + std::string GenerateTextureArgument(const TextureArgument& argument) { + const auto& [type, operand] = argument; if (operand == nullptr) { return {}; } @@ -1235,7 +1235,7 @@ private: std::string BuildImageValues(Operation operation) { constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; const std::size_t values_count{meta.values.size()}; std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); @@ -1780,14 +1780,14 @@ private: return {"0", Type::Int}; } - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), Type::Uint}; } Expression ImageStore(Operation operation) { - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), BuildIntegerCoordinates(operation), BuildImageValues(operation)); return {}; @@ -1795,7 +1795,7 @@ private: template <const std::string_view& opname> Expression AtomicImage(Operation operation) { - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; ASSERT(meta.values.size() == 1); return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), @@ -2246,7 +2246,7 @@ private: code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); } - std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { + std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { return fmt::format("{}_{}_{}", name, index, suffix); } @@ -2271,17 +2271,15 @@ private: ShaderWriter code; }; -static constexpr std::string_view flow_var = "flow_var_"; - std::string GetFlowVariable(u32 i) { - return fmt::format("{}{}", flow_var, i); + return fmt::format("flow_var_{}", i); } class ExprDecompiler { public: explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} - void operator()(VideoCommon::Shader::ExprAnd& expr) { + void operator()(const ExprAnd& expr) { inner += "( "; std::visit(*this, *expr.operand1); inner += " && "; @@ -2289,7 +2287,7 @@ public: inner += ')'; } - void operator()(VideoCommon::Shader::ExprOr& expr) { + void operator()(const ExprOr& expr) { inner += "( "; std::visit(*this, *expr.operand1); inner += " || "; @@ -2297,17 +2295,17 @@ public: inner += ')'; } - void operator()(VideoCommon::Shader::ExprNot& expr) { + void operator()(const ExprNot& expr) { inner += '!'; std::visit(*this, *expr.operand1); } - void operator()(VideoCommon::Shader::ExprPredicate& expr) { + void operator()(const ExprPredicate& expr) { const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); inner += decomp.GetPredicate(pred); } - void operator()(VideoCommon::Shader::ExprCondCode& expr) { + void operator()(const ExprCondCode& expr) { const Node cc = decomp.ir.GetConditionCode(expr.cc); std::string target; @@ -2329,15 +2327,15 @@ public: inner += target; } - void operator()(VideoCommon::Shader::ExprVar& expr) { + void operator()(const ExprVar& expr) { inner += GetFlowVariable(expr.var_index); } - void operator()(VideoCommon::Shader::ExprBoolean& expr) { + void operator()(const ExprBoolean& expr) { inner += expr.value ? "true" : "false"; } - std::string& GetResult() { + const std::string& GetResult() const { return inner; } @@ -2350,7 +2348,7 @@ class ASTDecompiler { public: explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} - void operator()(VideoCommon::Shader::ASTProgram& ast) { + void operator()(const ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); while (current) { Visit(current); @@ -2358,7 +2356,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTIfThen& ast) { + void operator()(const ASTIfThen& ast) { ExprDecompiler expr_parser{decomp}; std::visit(expr_parser, *ast.condition); decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); @@ -2372,7 +2370,7 @@ public: decomp.code.AddLine("}}"); } - void operator()(VideoCommon::Shader::ASTIfElse& ast) { + void operator()(const ASTIfElse& ast) { decomp.code.AddLine("else {{"); decomp.code.scope++; ASTNode current = ast.nodes.GetFirst(); @@ -2384,29 +2382,29 @@ public: decomp.code.AddLine("}}"); } - void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { + void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { + void operator()(const ASTBlockDecoded& ast) { decomp.VisitBlock(ast.nodes); } - void operator()(VideoCommon::Shader::ASTVarSet& ast) { + void operator()(const ASTVarSet& ast) { ExprDecompiler expr_parser{decomp}; std::visit(expr_parser, *ast.condition); decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); } - void operator()(VideoCommon::Shader::ASTLabel& ast) { + void operator()(const ASTLabel& ast) { decomp.code.AddLine("// Label_{}:", ast.index); } - void operator()(VideoCommon::Shader::ASTGoto& ast) { + void operator()([[maybe_unused]] const ASTGoto& ast) { UNREACHABLE(); } - void operator()(VideoCommon::Shader::ASTDoWhile& ast) { + void operator()(const ASTDoWhile& ast) { ExprDecompiler expr_parser{decomp}; std::visit(expr_parser, *ast.condition); decomp.code.AddLine("do {{"); @@ -2420,7 +2418,7 @@ public: decomp.code.AddLine("}} while({});", expr_parser.GetResult()); } - void operator()(VideoCommon::Shader::ASTReturn& ast) { + void operator()(const ASTReturn& ast) { const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); if (!is_true) { ExprDecompiler expr_parser{decomp}; @@ -2440,7 +2438,7 @@ public: } } - void operator()(VideoCommon::Shader::ASTBreak& ast) { + void operator()(const ASTBreak& ast) { const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); if (!is_true) { ExprDecompiler expr_parser{decomp}; @@ -2455,7 +2453,7 @@ public: } } - void Visit(VideoCommon::Shader::ASTNode& node) { + void Visit(const ASTNode& node) { std::visit(*this, *node->GetInnerData()); } @@ -2468,9 +2466,9 @@ void GLSLDecompiler::DecompileAST() { for (u32 i = 0; i < num_flow_variables; i++) { code.AddLine("bool {} = false;", GetFlowVariable(i)); } + ASTDecompiler decompiler{*this}; - VideoCommon::Shader::ASTNode program = ir.GetASTProgram(); - decompiler.Visit(program); + decompiler.Visit(ir.GetASTProgram()); } } // Anonymous namespace diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..4bbd17b12 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst RendererOpenGL::~RendererOpenGL() = default; void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - system.GetPerfStats().EndSystemFrame(); - // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.AllDirty(); @@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); - system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); - system.GetPerfStats().BeginSystemFrame(); - // Restore the rasterizer state prev_state.AllDirty(); prev_state.Apply(); diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 268d1aed0..9d21f45de 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -473,8 +473,8 @@ void DecompileShader(CFGRebuildState& state) { state.manager->Decompile(); } -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address, +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address, const CompilerSettings& settings) { auto result_out = std::make_unique<ShaderCharacteristics>(); if (settings.depth == CompileDepth::BruteForce) { diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 74e54a5c7..37e987d62 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -76,8 +76,8 @@ struct ShaderCharacteristics { CompilerSettings settings{}; }; -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address, +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address, const CompilerSettings& settings); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 338bab17c..447fb5c1d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -410,7 +410,7 @@ public: explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} explicit OperationNode(OperationCode code, Meta meta) - : OperationNode(code, meta, std::vector<Node>{}) {} + : OperationNode(code, std::move(meta), std::vector<Node>{}) {} explicit OperationNode(OperationCode code, std::vector<Node> operands) : OperationNode(code, Meta{}, std::move(operands)) {} diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 250afc6d6..ef6b3592e 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -252,6 +252,7 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, default: break; } + break; case Tegra::Texture::TextureFormat::R32_G32_B32_A32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ca2da8f97..6a92b22d3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -62,10 +62,10 @@ public: } } - /*** - * `Guard` guarantees that rendertargets don't unregister themselves if the + /** + * Guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. - ***/ + */ void GuardRenderTargets(bool new_guard) { guard_render_targets = new_guard; } @@ -287,7 +287,7 @@ protected: const Tegra::Engines::Fermi2D::Config& copy_config) = 0; // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture - // and reading it from a sepparate buffer. + // and reading it from a separate buffer. virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void ManageRenderTargetUnregister(TSurface& surface) { @@ -386,12 +386,13 @@ private: }; /** - * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. - * @param untopological, tells the recycler that the texture has no way to match the overlaps - * due to topological reasons. + * Takes care of selecting a proper strategy to deal with a texture recycle. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. + * @param untopological Indicates to the recycler that the texture has no way + * to match the overlaps due to topological reasons. **/ RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { @@ -402,7 +403,7 @@ private: if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } - for (auto s : overlaps) { + for (const auto& s : overlaps) { const auto& s_params = s->GetSurfaceParams(); if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -419,16 +420,19 @@ private: } /** - * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in - *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the - *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the - *new surface from that data. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. - * @param preserve_contents, tells if the new surface should be loaded from meory or left blank - * @param untopological, tells the recycler that the texture has no way to match the overlaps - * due to topological reasons. + * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented + * strategies: Ignore and Flush. + * + * - Ignore: Just unregisters all the overlaps and loads the new texture. + * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters for the new surface. + * @param gpu_addr The starting address of the new surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or left + * blank. + * @param untopological Indicates to the recycler that the texture has no way to match the + * overlaps due to topological reasons. **/ std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, @@ -465,10 +469,12 @@ private: } /** - * `RebuildSurface` this method takes a single surface and recreates into another that - * may differ in format, target or width alingment. - * @param current_surface, the registered surface in the cache which we want to convert. - * @param params, the new surface params which we'll use to recreate the surface. + * Takes a single surface and recreates into another that may differ in + * format, target or width alignment. + * + * @param current_surface The registered surface in the cache which we want to convert. + * @param params The new surface params which we'll use to recreate the surface. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, bool is_render) { @@ -502,12 +508,14 @@ private: } /** - * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's - * params if it's an exact match, we return the main view of the registered surface. If it's - * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats + * Takes a single surface and checks with the new surface's params if it's an exact + * match, we return the main view of the registered surface. If its formats don't + * match, we rebuild the surface. We call this last method a `Mirage`. If formats * match but the targets don't, we create an overview View of the registered surface. - * @param current_surface, the registered surface in the cache which we want to convert. - * @param params, the new surface params which we want to check. + * + * @param current_surface The registered surface in the cache which we want to convert. + * @param params The new surface params which we want to check. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, const SurfaceParams& params, bool is_render) { @@ -529,13 +537,14 @@ private: } /** - * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface - * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps - * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface - * for them, else we return nothing. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. + * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate + * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps + * of the new surface, if they all match we end up recreating a surface for them, + * else we return nothing. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. **/ std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, const SurfaceParams& params, @@ -575,7 +584,7 @@ private: } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { return {}; } - for (auto surface : overlaps) { + for (const auto& surface : overlaps) { Unregister(surface); } new_surface->MarkAsModified(modified, Tick()); @@ -584,19 +593,27 @@ private: } /** - * `GetSurface` gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache. This is done in 3 big steps. The first is to - * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. - * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from - * memory else we move to step 3. Step 3 consists on figuring the relationship between the - * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many - * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the - * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to - * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface - * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the paremeters on the candidate surface. - * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. + * Gets the starting address and parameters of a candidate surface and tries + * to find a matching surface within the cache. This is done in 3 big steps: + * + * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. + * + * 2. Check if there are any overlaps at all, if there are none, we just load the texture from + * memory else we move to step 3. + * + * 3. Consists of figuring out the relationship between the candidate texture and the + * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If + * there's many, we just try to reconstruct a new surface out of them based on the + * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we + * have to check if the candidate is a view (layer/mipmap) of the overlap or if the + * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct + * a new surface. + * + * @param gpu_addr The starting address of the candidate surface. + * @param params The parameters on the candidate surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or + * left blank. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { @@ -651,7 +668,7 @@ private: // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails - // inmediatly recycle the texture + // immediately recycle the texture for (const auto& surface : overlaps) { const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { @@ -720,12 +737,13 @@ private: } /** - * `DeduceSurface` gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache that's similar to it. If there are many textures + * Gets the starting address and parameters of a candidate surface and tries to find a + * matching surface within the cache that's similar to it. If there are many textures * or the texture found if entirely incompatible, it will fail. If no texture is found, the * blit will be unsuccessful. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the paremeters on the candidate surface. + * + * @param gpu_addr The starting address of the candidate surface. + * @param params The parameters on the candidate surface. **/ Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; @@ -777,11 +795,14 @@ private: } /** - * `DeduceBestBlit` gets the a source and destination starting address and parameters, + * Gets the a source and destination starting address and parameters, * and tries to deduce if they are supposed to be depth textures. If so, their * parameters are modified and fixed into so. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the parameters on the candidate surface. + * + * @param src_params The parameters of the candidate surface. + * @param dst_params The parameters of the destination surface. + * @param src_gpu_addr The starting address of the candidate surface. + * @param dst_gpu_addr The starting address of the destination surface. **/ void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { |