From de0ab806df4575df93068e128d911dabbf396d2c Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 29 Oct 2018 23:36:03 -0400 Subject: maxwell_3d: Restructure macro upload to use a single macro code memory. - Fixes an issue where macros could be skipped. - Fixes rendering of distant objects in Super Mario Odyssey. --- src/video_core/engines/maxwell_3d.cpp | 26 ++++++++++++++++++-------- src/video_core/engines/maxwell_3d.h | 25 +++++++++++++++++++++---- src/video_core/macro_interpreter.cpp | 19 ++++++++++--------- src/video_core/macro_interpreter.h | 12 ++++++------ 4 files changed, 55 insertions(+), 27 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7357d20d1..d79c50919 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -43,15 +43,17 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector parameters) { // Reset the current macro. executing_macro = 0; - // The requested macro must have been uploaded already. - auto macro_code = uploaded_macros.find(method); - if (macro_code == uploaded_macros.end()) { - LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); + // Lookup the macro offset + const u32 entry{(method - MacroRegistersStart) >> 1}; + const auto& search{macro_offsets.find(entry)}; + if (search == macro_offsets.end()) { + LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); + UNREACHABLE(); return; } // Execute the current macro. - macro_interpreter.Execute(macro_code->second, std::move(parameters)); + macro_interpreter.Execute(search->second, std::move(parameters)); } void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { @@ -97,6 +99,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { ProcessMacroUpload(value); break; } + case MAXWELL3D_REG_INDEX(macros.bind): { + ProcessMacroBind(value); + break; + } case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): @@ -158,9 +164,13 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { } void Maxwell3D::ProcessMacroUpload(u32 data) { - // Store the uploaded macro code to interpret them when they're called. - auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; - macro.push_back(data); + ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), + "upload_address exceeded macro_memory size!"); + macro_memory[regs.macros.upload_address++] = data; +} + +void Maxwell3D::ProcessMacroBind(u32 data) { + macro_offsets[regs.macros.entry] = data; } void Maxwell3D::ProcessQueryGet() { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 443affc36..50873813e 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -475,12 +475,13 @@ public: INSERT_PADDING_WORDS(0x45); struct { - INSERT_PADDING_WORDS(1); + u32 upload_address; u32 data; u32 entry; + u32 bind; } macros; - INSERT_PADDING_WORDS(0x189); + INSERT_PADDING_WORDS(0x188); u32 tfb_enabled; @@ -994,12 +995,25 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than + /// we've seen used. + using MacroMemory = std::array; + + /// Gets a reference to macro memory. + const MacroMemory& GetMacroMemory() const { + return macro_memory; + } + private: void InitializeRegisterDefaults(); VideoCore::RasterizerInterface& rasterizer; - std::unordered_map> uploaded_macros; + /// Start offsets of each macro in macro_memory + std::unordered_map macro_offsets; + + /// Memory for macro code + MacroMemory macro_memory; /// Macro method that is currently being executed / being fed parameters. u32 executing_macro = 0; @@ -1022,9 +1036,12 @@ private: */ void CallMacroMethod(u32 method, std::vector parameters); - /// Handles writes to the macro uploading registers. + /// Handles writes to the macro uploading register. void ProcessMacroUpload(u32 data); + /// Handles writes to the macro bind register. + void ProcessMacroBind(u32 data); + /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(); diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index f6af132fb..335a8d407 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -11,7 +11,7 @@ namespace Tegra { MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} -void MacroInterpreter::Execute(const std::vector& code, std::vector parameters) { +void MacroInterpreter::Execute(u32 offset, std::vector parameters) { Reset(); registers[1] = parameters[0]; this->parameters = std::move(parameters); @@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector& code, std::vector pa // Execute the code until we hit an exit condition. bool keep_executing = true; while (keep_executing) { - keep_executing = Step(code, false); + keep_executing = Step(offset, false); } // Assert the the macro used all the input parameters @@ -37,10 +37,10 @@ void MacroInterpreter::Reset() { next_parameter_index = 1; } -bool MacroInterpreter::Step(const std::vector& code, bool is_delay_slot) { +bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { u32 base_address = pc; - Opcode opcode = GetOpcode(code); + Opcode opcode = GetOpcode(offset); pc += 4; // Update the program counter if we were delayed @@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector& code, bool is_delay_slot) { delayed_pc = base_address + opcode.GetBranchTarget(); // Execute one more instruction due to the delay slot. - return Step(code, true); + return Step(offset, true); } break; } @@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector& code, bool is_delay_slot) { // Exit has a delay slot, execute the next instruction // Note: Executing an exit during a branch delay slot will cause the instruction at the // branch target to be executed before exiting. - Step(code, true); + Step(offset, true); return false; } return true; } -MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector& code) const { +MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const { + const auto& macro_memory{maxwell3d.GetMacroMemory()}; ASSERT((pc % sizeof(u32)) == 0); - ASSERT(pc < code.size() * sizeof(u32)); - return {code[pc / sizeof(u32)]}; + ASSERT((pc + offset) < macro_memory.size() * sizeof(u32)); + return {macro_memory[offset + pc / sizeof(u32)]}; } u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index 773684bde..62d1ce289 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h @@ -22,10 +22,10 @@ public: /** * Executes the macro code with the specified input parameters. - * @param code The macro byte code to execute - * @param parameters The parameters of the macro + * @param offset Offset to start execution at. + * @param parameters The parameters of the macro. */ - void Execute(const std::vector& code, std::vector parameters); + void Execute(u32 offset, std::vector parameters); private: enum class Operation : u32 { @@ -110,11 +110,11 @@ private: /** * Executes a single macro instruction located at the current program counter. Returns whether * the interpreter should keep running. - * @param code The macro code to execute. + * @param offset Offset to start execution at. * @param is_delay_slot Whether the current step is being executed due to a delay slot in a * previous instruction. */ - bool Step(const std::vector& code, bool is_delay_slot); + bool Step(u32 offset, bool is_delay_slot); /// Calculates the result of an ALU operation. src_a OP src_b; u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; @@ -127,7 +127,7 @@ private: bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; /// Reads an opcode at the current program counter location. - Opcode GetOpcode(const std::vector& code) const; + Opcode GetOpcode(u32 offset) const; /// Returns the specified register's value. Register 0 is hardcoded to always return 0. u32 GetRegister(u32 register_id) const; -- cgit v1.2.3