diff options
Diffstat (limited to 'src/video_core')
30 files changed, 458 insertions, 290 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 106991969..d7f7d336c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -73,8 +73,6 @@ add_library(video_core STATIC macro/macro_hle.h macro/macro_interpreter.cpp macro/macro_interpreter.h - macro/macro_jit_x64.cpp - macro/macro_jit_x64.h fence_manager.h gpu.cpp gpu.h @@ -245,7 +243,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak) +target_link_libraries(video_core PUBLIC glad shader_recompiler) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) @@ -282,8 +280,19 @@ else() -Wno-sign-conversion ) + + # xbyak + set_source_files_properties(macro/macro_jit_x64.cpp PROPERTIES COMPILE_OPTIONS "-Wno-conversion;-Wno-shadow") endif() if (ARCHITECTURE_x86_64) + target_sources(video_core PRIVATE + macro/macro_jit_x64.cpp + macro/macro_jit_x64.h + ) + target_link_libraries(video_core PUBLIC xbyak) +endif() + +if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) target_link_libraries(video_core PRIVATE dynarmic) endif() diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b1a22b76c..4a2f2c1fd 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -117,10 +117,15 @@ void Maxwell3D::InitializeRegisterDefaults() { shadow_state = regs; - mme_inline[MAXWELL3D_REG_INDEX(draw.end)] = true; - mme_inline[MAXWELL3D_REG_INDEX(draw.begin)] = true; - mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; - mme_inline[MAXWELL3D_REG_INDEX(index_buffer.count)] = true; + draw_command[MAXWELL3D_REG_INDEX(draw.end)] = true; + draw_command[MAXWELL3D_REG_INDEX(draw.begin)] = true; + draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.first)] = true; + draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer.first)] = true; + draw_command[MAXWELL3D_REG_INDEX(index_buffer.count)] = true; + draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true; + draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true; + draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true; } void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { @@ -208,25 +213,21 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return ProcessCBBind(3); case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config): return ProcessCBBind(4); - case MAXWELL3D_REG_INDEX(draw.end): - return DrawArrays(); case MAXWELL3D_REG_INDEX(index_buffer32_first): regs.index_buffer.count = regs.index_buffer32_first.count; regs.index_buffer.first = regs.index_buffer32_first.first; dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - return DrawArrays(); + return ProcessDraw(); case MAXWELL3D_REG_INDEX(index_buffer16_first): regs.index_buffer.count = regs.index_buffer16_first.count; regs.index_buffer.first = regs.index_buffer16_first.first; dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - return DrawArrays(); + return ProcessDraw(); case MAXWELL3D_REG_INDEX(index_buffer8_first): regs.index_buffer.count = regs.index_buffer8_first.count; regs.index_buffer.first = regs.index_buffer8_first.first; dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - // a macro calls this one over and over, should it increase instancing? - // Used by Hades and likely other Vulkan games. - return DrawArrays(); + return ProcessDraw(); case MAXWELL3D_REG_INDEX(topology_override): use_topology_override = true; return; @@ -261,14 +262,13 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) // Execute the current macro. macro_engine->Execute(macro_positions[entry], parameters); - if (mme_draw.current_mode != MMEDrawMode::Undefined) { - FlushMMEInlineDraw(); - } + + ProcessDeferredDraw(); } void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { - // It is an error to write to a register other than the current macro's ARG register before it - // has finished execution. + // It is an error to write to a register other than the current macro's ARG register before + // it has finished execution. if (executing_macro != 0) { ASSERT(method == executing_macro + 1); } @@ -283,9 +283,33 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); - const u32 argument = ProcessShadowRam(method, method_argument); - ProcessDirtyRegisters(method, argument); - ProcessMethodCall(method, argument, method_argument, is_last_call); + if (draw_command[method]) { + regs.reg_array[method] = method_argument; + deferred_draw_method.push_back(method); + auto u32_to_u8 = [&](const u32 argument) { + inline_index_draw_indexes.push_back(static_cast<u8>(argument & 0x000000ff)); + inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x0000ff00) >> 8)); + inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x00ff0000) >> 16)); + inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0xff000000) >> 24)); + }; + if (MAXWELL3D_REG_INDEX(draw_inline_index) == method) { + u32_to_u8(method_argument); + } else if (MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method) { + u32_to_u8(regs.inline_index_2x16.even); + u32_to_u8(regs.inline_index_2x16.odd); + } else if (MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) { + u32_to_u8(regs.inline_index_4x8.index0); + u32_to_u8(regs.inline_index_4x8.index1); + u32_to_u8(regs.inline_index_4x8.index2); + u32_to_u8(regs.inline_index_4x8.index3); + } + } else { + ProcessDeferredDraw(); + + const u32 argument = ProcessShadowRam(method, method_argument); + ProcessDirtyRegisters(method, argument); + ProcessMethodCall(method, argument, method_argument, is_last_call); + } } void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, @@ -326,55 +350,6 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, } } -void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) { - if (mme_draw.current_mode == MMEDrawMode::Undefined) { - if (mme_draw.gl_begin_consume) { - mme_draw.current_mode = expected_mode; - mme_draw.current_count = count; - mme_draw.instance_count = 1; - mme_draw.gl_begin_consume = false; - mme_draw.gl_end_count = 0; - } - return; - } else { - if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count && - mme_draw.instance_mode && mme_draw.gl_begin_consume) { - mme_draw.instance_count++; - mme_draw.gl_begin_consume = false; - return; - } else { - FlushMMEInlineDraw(); - } - } - // Tail call in case it needs to retry. - StepInstance(expected_mode, count); -} - -void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { - if (mme_inline[method]) { - regs.reg_array[method] = method_argument; - if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || - method == MAXWELL3D_REG_INDEX(index_buffer.count)) { - const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) - ? MMEDrawMode::Array - : MMEDrawMode::Indexed; - StepInstance(expected_mode, method_argument); - } else if (method == MAXWELL3D_REG_INDEX(draw.begin)) { - mme_draw.instance_mode = - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged); - mme_draw.gl_begin_consume = true; - } else { - mme_draw.gl_end_count++; - } - } else { - if (mme_draw.current_mode != MMEDrawMode::Undefined) { - FlushMMEInlineDraw(); - } - CallMethod(method, method_argument, true); - } -} - void Maxwell3D::ProcessTopologyOverride() { using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; @@ -404,41 +379,6 @@ void Maxwell3D::ProcessTopologyOverride() { } } -void Maxwell3D::FlushMMEInlineDraw() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), - regs.vertex_buffer.count); - ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); - ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); - - // Both instance configuration registers can not be set at the same time. - ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || - regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, - "Illegal combination of instancing parameters"); - - ProcessTopologyOverride(); - - const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; - if (ShouldExecute()) { - rasterizer->Draw(is_indexed, true); - } - - // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if - // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - - // it's possible that it is incorrect and that there is some other register used to specify the - // drawing mode. - if (is_indexed) { - regs.index_buffer.count = 0; - } else { - regs.vertex_buffer.count = 0; - } - mme_draw.current_mode = MMEDrawMode::Undefined; - mme_draw.current_count = 0; - mme_draw.instance_count = 0; - mme_draw.instance_mode = false; - mme_draw.gl_begin_consume = false; - mme_draw.gl_end_count = 0; -} - void Maxwell3D::ProcessMacroUpload(u32 data) { macro_engine->AddCode(regs.load_mme.instruction_ptr++, data); } @@ -573,42 +513,6 @@ void Maxwell3D::ProcessSyncPoint() { rasterizer->SignalSyncPoint(sync_point); } -void Maxwell3D::DrawArrays() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), - regs.vertex_buffer.count); - ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); - - // Both instance configuration registers can not be set at the same time. - ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || - regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, - "Illegal combination of instancing parameters"); - - ProcessTopologyOverride(); - - if (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) { - // Increment the current instance *before* drawing. - state.current_instance++; - } else if (regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged) { - // Reset the current instance to 0. - state.current_instance = 0; - } - - const bool is_indexed{regs.index_buffer.count && !regs.vertex_buffer.count}; - if (ShouldExecute()) { - rasterizer->Draw(is_indexed, false); - } - - // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if - // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - - // it's possible that it is incorrect and that there is some other register used to specify the - // drawing mode. - if (is_indexed) { - regs.index_buffer.count = 0; - } else { - regs.vertex_buffer.count = 0; - } -} - std::optional<u64> Maxwell3D::GetQueryResult() { switch (regs.report_semaphore.query.report) { case Regs::ReportSemaphore::Report::Payload: @@ -691,4 +595,83 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } +void Maxwell3D::ProcessDraw(u32 instance_count) { + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), + regs.vertex_buffer.count); + + ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?"); + + // Both instance configuration registers can not be set at the same time. + ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First || + regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged, + "Illegal combination of instancing parameters"); + + ProcessTopologyOverride(); + + const bool is_indexed = regs.index_buffer.count && !regs.vertex_buffer.count; + if (ShouldExecute()) { + rasterizer->Draw(is_indexed, instance_count); + } + + if (is_indexed) { + regs.index_buffer.count = 0; + } else { + regs.vertex_buffer.count = 0; + } +} + +void Maxwell3D::ProcessDeferredDraw() { + if (deferred_draw_method.empty()) { + return; + } + + enum class DrawMode { + Undefined, + General, + Instance, + }; + DrawMode draw_mode{DrawMode::Undefined}; + u32 method_count = static_cast<u32>(deferred_draw_method.size()); + u32 method = deferred_draw_method[method_count - 1]; + if (MAXWELL3D_REG_INDEX(draw.end) != method) { + return; + } + draw_mode = (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || + (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged) + ? DrawMode::Instance + : DrawMode::General; + u32 instance_count = 0; + if (draw_mode == DrawMode::Instance) { + u32 vertex_buffer_count = 0; + u32 index_buffer_count = 0; + for (u32 index = 0; index < method_count; ++index) { + method = deferred_draw_method[index]; + if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) { + instance_count = ++vertex_buffer_count; + } else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) { + instance_count = ++index_buffer_count; + } + } + ASSERT_MSG(!(vertex_buffer_count && index_buffer_count), + "Instance both indexed and direct?"); + } else { + instance_count = 1; + for (u32 index = 0; index < method_count; ++index) { + method = deferred_draw_method[index]; + if (MAXWELL3D_REG_INDEX(draw_inline_index) == method || + MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method || + MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) { + regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4); + regs.index_buffer.format = Regs::IndexFormat::UnsignedInt; + break; + } + } + } + + ProcessDraw(instance_count); + + deferred_draw_method.clear(); + inline_index_draw_indexes.clear(); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 75e3b868d..910ab213a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1739,14 +1739,11 @@ public: Footprint_1x1_Virtual = 2, }; - struct InlineIndex4x8Align { + struct InlineIndex4x8 { union { BitField<0, 30, u32> count; BitField<30, 2, u32> start; }; - }; - - struct InlineIndex4x8Index { union { BitField<0, 8, u32> index0; BitField<8, 8, u32> index1; @@ -2836,8 +2833,7 @@ public: u32 depth_write_enabled; ///< 0x12E8 u32 alpha_test_enabled; ///< 0x12EC INSERT_PADDING_BYTES_NOINIT(0x10); - InlineIndex4x8Align inline_index_4x8_align; ///< 0x1300 - InlineIndex4x8Index inline_index_4x8_index; ///< 0x1304 + InlineIndex4x8 inline_index_4x8; ///< 0x1300 D3DCullMode d3d_cull_mode; ///< 0x1308 ComparisonOp depth_test_func; ///< 0x130C f32 alpha_test_ref; ///< 0x1310 @@ -2974,7 +2970,7 @@ public: CullFace gl_cull_face; ///< 0x1920 Viewport::PixelCenter viewport_pixel_center; ///< 0x1924 INSERT_PADDING_BYTES_NOINIT(0x4); - u32 viewport_scale_offset_enbled; ///< 0x192C + u32 viewport_scale_offset_enabled; ///< 0x192C INSERT_PADDING_BYTES_NOINIT(0xC); ViewportClipControl viewport_clip_control; ///< 0x193C UserClip::Op user_clip_op; ///< 0x1940 @@ -3048,8 +3044,6 @@ public: }; std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; - - u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. }; State state{}; @@ -3064,11 +3058,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - /// Write the value to the register identified by method. - void CallMethodFromMME(u32 method, u32 method_argument); - - void FlushMMEInlineDraw(); - bool ShouldExecute() const { return execute_on; } @@ -3081,21 +3070,6 @@ public: return *rasterizer; } - enum class MMEDrawMode : u32 { - Undefined, - Array, - Indexed, - }; - - struct MMEDrawState { - MMEDrawMode current_mode{MMEDrawMode::Undefined}; - u32 current_count{}; - u32 instance_count{}; - bool instance_mode{}; - bool gl_begin_consume{}; - u32 gl_end_count{}; - } mme_draw; - struct DirtyState { using Flags = std::bitset<std::numeric_limits<u8>::max()>; using Table = std::array<u8, Regs::NUM_REGS>; @@ -3105,6 +3079,8 @@ public: Tables tables{}; } dirty; + std::vector<u8> inline_index_draw_indexes; + private: void InitializeRegisterDefaults(); @@ -3164,14 +3140,12 @@ private: /// Handles a write to the CB_BIND register. void ProcessCBBind(size_t stage_index); - /// Handles a write to the VERTEX_END_GL register, triggering a draw. - void DrawArrays(); - /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) void ProcessTopologyOverride(); - // Handles a instance drawcall from MME - void StepInstance(MMEDrawMode expected_mode, u32 count); + void ProcessDraw(u32 instance_count = 1); + + void ProcessDeferredDraw(); /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional<u64> GetQueryResult(); @@ -3184,8 +3158,6 @@ private: /// Start offsets of each macro in macro_memory std::array<u32, 0x80> macro_positions{}; - std::array<bool, Regs::NUM_REGS> mme_inline{}; - /// Macro method that is currently being executed / being fed parameters. u32 executing_macro = 0; /// Parameters that have been submitted to the macro call so far. @@ -3198,6 +3170,9 @@ private: bool execute_on{true}; bool use_topology_override{false}; + + std::array<bool, Regs::NUM_REGS> draw_command{}; + std::vector<u32> deferred_draw_method; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -3402,8 +3377,7 @@ ASSERT_REG_POSITION(alpha_to_coverage_dither, 0x12E0); ASSERT_REG_POSITION(blend_per_target_enabled, 0x12E4); ASSERT_REG_POSITION(depth_write_enabled, 0x12E8); ASSERT_REG_POSITION(alpha_test_enabled, 0x12EC); -ASSERT_REG_POSITION(inline_index_4x8_align, 0x1300); -ASSERT_REG_POSITION(inline_index_4x8_index, 0x1304); +ASSERT_REG_POSITION(inline_index_4x8, 0x1300); ASSERT_REG_POSITION(d3d_cull_mode, 0x1308); ASSERT_REG_POSITION(depth_test_func, 0x130C); ASSERT_REG_POSITION(alpha_test_ref, 0x1310); @@ -3508,7 +3482,7 @@ ASSERT_REG_POSITION(gl_cull_test_enabled, 0x1918); ASSERT_REG_POSITION(gl_front_face, 0x191C); ASSERT_REG_POSITION(gl_cull_face, 0x1920); ASSERT_REG_POSITION(viewport_pixel_center, 0x1924); -ASSERT_REG_POSITION(viewport_scale_offset_enbled, 0x192C); +ASSERT_REG_POSITION(viewport_scale_offset_enabled, 0x192C); ASSERT_REG_POSITION(viewport_clip_control, 0x193C); ASSERT_REG_POSITION(user_clip_op, 0x1940); ASSERT_REG_POSITION(render_enable_override, 0x1944); diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index f61d5998e..505d81c1e 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -16,7 +16,10 @@ #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" #include "video_core/macro/macro_interpreter.h" + +#ifdef ARCHITECTURE_x86_64 #include "video_core/macro/macro_jit_x64.h" +#endif namespace Tegra { diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 8a8adbb42..f896591bf 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -22,35 +22,29 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.draw.topology.Assign( static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff)); maxwell3d.regs.global_base_instance_index = parameters[5]; - maxwell3d.mme_draw.instance_count = instance_count; maxwell3d.regs.global_base_vertex_index = parameters[3]; maxwell3d.regs.index_buffer.count = parameters[1]; maxwell3d.regs.index_buffer.first = parameters[4]; if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, instance_count); } maxwell3d.regs.index_buffer.count = 0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { - const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); maxwell3d.regs.vertex_buffer.first = parameters[3]; maxwell3d.regs.vertex_buffer.count = parameters[1]; maxwell3d.regs.global_base_instance_index = parameters[4]; maxwell3d.regs.draw.topology.Assign( static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); - maxwell3d.mme_draw.instance_count = count; if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(false, true); + maxwell3d.Rasterizer().Draw(false, instance_count); } maxwell3d.regs.vertex_buffer.count = 0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { @@ -63,24 +57,21 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.regs.global_base_vertex_index = element_base; maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.mme_draw.instance_count = instance_count; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, element_base); - maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, element_base, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); maxwell3d.regs.draw.topology.Assign( static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, instance_count); } maxwell3d.regs.vertex_id_base = 0x0; maxwell3d.regs.index_buffer.count = 0; maxwell3d.regs.global_base_vertex_index = 0x0; maxwell3d.regs.global_base_instance_index = 0x0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, 0x0); - maxwell3d.CallMethodFromMME(0x8e5, 0x0); - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); } // Multidraw Indirect @@ -91,11 +82,9 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.index_buffer.count = 0; maxwell3d.regs.global_base_vertex_index = 0x0; maxwell3d.regs.global_base_instance_index = 0x0; - maxwell3d.mme_draw.instance_count = 0; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, 0x0); - maxwell3d.CallMethodFromMME(0x8e5, 0x0); - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, 0x0, true); + maxwell3d.CallMethod(0x8e5, 0x0, true); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; }); const u32 start_indirect = parameters[0]; @@ -127,15 +116,13 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.index_buffer.count = num_vertices; maxwell3d.regs.global_base_vertex_index = base_vertex; maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.mme_draw.instance_count = instance_count; - maxwell3d.CallMethodFromMME(0x8e3, 0x640); - maxwell3d.CallMethodFromMME(0x8e4, base_vertex); - maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.CallMethod(0x8e3, 0x640, true); + maxwell3d.CallMethod(0x8e4, base_vertex, true); + maxwell3d.CallMethod(0x8e5, base_instance, true); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, instance_count); } - maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } } diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index f670b1bca..c0d32c112 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -335,7 +335,7 @@ void MacroInterpreterImpl::SetMethodAddress(u32 address) { } void MacroInterpreterImpl::Send(u32 value) { - maxwell3d.CallMethodFromMME(method_address.address, value); + maxwell3d.CallMethod(method_address.address, value, true); // Increment the method address by the method increment. method_address.address.Assign(method_address.address.Value() + method_address.increment.Value()); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index a302a9603..25c1ce798 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -346,7 +346,7 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { } void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { - maxwell3d->CallMethodFromMME(method_address.address, value); + maxwell3d->CallMethod(method_address.address, value, true); } void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 384350dbd..8c8dfcca6 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -45,7 +45,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 kind_valus.fill(PTEKind::INVALID); big_kinds.resize(big_page_table_size / 32, kind_valus); entries.resize(page_table_size / 32, 0); - kinds.resize(big_page_table_size / 32, kind_valus); + kinds.resize(page_table_size / 32, kind_valus); } MemoryManager::~MemoryManager() = default; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index d2d40884c..1cbfef090 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -40,7 +40,7 @@ public: virtual ~RasterizerInterface() = default; /// Dispatches a draw invocation - virtual void Draw(bool is_indexed, bool is_instanced) = 0; + virtual void Draw(bool is_indexed, u32 instance_count) = 0; /// Clear the current framebuffer virtual void Clear() = 0; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 08f4d69ab..6af4ae793 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -29,17 +29,17 @@ constexpr std::array PROGRAM_LUT{ [[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) { switch (gl_format) { case GL_RGBA8_SNORM: - return GL_RGBA8; + return GL_RGBA8I; case GL_R8_SNORM: - return GL_R8; + return GL_R8I; case GL_RGBA16_SNORM: - return GL_RGBA16; + return GL_RGBA16I; case GL_R16_SNORM: - return GL_R16; + return GL_R16I; case GL_RG16_SNORM: - return GL_RG16; + return GL_RG16I; case GL_RG8_SNORM: - return GL_RG8; + return GL_RG8I; default: return gl_format; } @@ -96,9 +96,6 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { texture.Create(GL_TEXTURE_BUFFER); const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; const GLenum texture_format{GetTextureBufferFormat(gl_format)}; - if (texture_format != gl_format) { - LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM."); - } glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size); views.push_back({ .offset = offset, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 1d20a79ec..c115dabe1 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -503,6 +503,17 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { float_image_scaling_mask, down_factor, 0.0f); } } + if (info.uses_render_area) { + const auto render_area_width(static_cast<GLfloat>(regs.surface_clip.width)); + const auto render_area_height(static_cast<GLfloat>(regs.surface_clip.height)); + if (use_assembly) { + glProgramLocalParameter4fARB(AssemblyStage(stage), 1, render_area_width, + render_area_height, 0.0f, 0.0f); + } else { + glProgramUniform4f(source_programs[stage].handle, 1, render_area_width, + render_area_height, 0.0f, 0.0f); + } + } }}; if constexpr (Spec::enabled_stages[0]) { prepare_stage(0); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e5c09a969..8a8b5ce54 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -205,7 +205,7 @@ void RasterizerOpenGL::Clear() { ++num_queued_commands; } -void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { +void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) { MICROPROFILE_SCOPE(OpenGL_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); @@ -222,14 +222,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); + BindInlineIndexBuffer(); + SyncState(); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d->regs.draw.topology); BeginTransformFeedback(pipeline, primitive_mode); const GLuint base_instance = static_cast<GLuint>(maxwell3d->regs.global_base_instance_index); - const GLsizei num_instances = - static_cast<GLsizei>(is_instanced ? maxwell3d->mme_draw.instance_count : 1); + const GLsizei num_instances = static_cast<GLsizei>(instance_count); if (is_indexed) { const GLint base_vertex = static_cast<GLint>(maxwell3d->regs.global_base_vertex_index); const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d->regs.index_buffer.count); @@ -617,6 +618,16 @@ void RasterizerOpenGL::SyncViewport() { } flags[Dirty::Viewport0 + index] = false; + if (!regs.viewport_scale_offset_enabled) { + const auto x = static_cast<GLfloat>(regs.surface_clip.x); + const auto y = static_cast<GLfloat>(regs.surface_clip.y); + const auto width = static_cast<GLfloat>(regs.surface_clip.width); + const auto height = static_cast<GLfloat>(regs.surface_clip.height); + glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f, + height != 0.0f ? height : 1.0f); + continue; + } + const auto& src = regs.viewport_transform[index]; GLfloat x = conv(src.translate_x - src.scale_x); GLfloat y = conv(src.translate_y - src.scale_y); @@ -1129,6 +1140,16 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) { query_cache.EraseChannel(channel_id); } +void RasterizerOpenGL::BindInlineIndexBuffer() { + if (maxwell3d->inline_index_draw_indexes.empty()) { + return; + } + const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size()); + auto buffer = Buffer(buffer_cache_runtime, *this, 0, data_count); + buffer.ImmediateUpload(0, maxwell3d->inline_index_draw_indexes); + buffer_cache_runtime.BindIndexBuffer(buffer, 0, data_count); +} + AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 45131b785..793e0d608 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -68,7 +68,7 @@ public: StateTracker& state_tracker_); ~RasterizerOpenGL() override; - void Draw(bool is_indexed, bool is_instanced) override; + void Draw(bool is_indexed, u32 instance_count) override; void Clear() override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -199,6 +199,8 @@ private: /// End a transform feedback void EndTransformFeedback(); + void BindInlineIndexBuffer(); + Tegra::GPU& gpu; const Device& device; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e94cfdb1a..3fe04a115 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -49,7 +49,7 @@ using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; -constexpr u32 CACHE_VERSION = 6; +constexpr u32 CACHE_VERSION = 7; template <typename Container> auto MakeSpan(Container& container) { @@ -76,7 +76,8 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, } break; case Shader::Stage::TessellationEval: - info.tess_clockwise = key.tessellation_clockwise != 0; + // Flip the face, as OpenGL's drawing is flipped. + info.tess_clockwise = key.tessellation_clockwise == 0; info.tess_primitive = [&key] { switch (key.tessellation_primitive) { case Maxwell::Tessellation::DomainType::Isolines: @@ -218,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_float16 = false, .support_int64 = device.HasShaderInt64(), .needs_demote_reorder = device.IsAmd(), + .support_snorm_render_buffer = false, } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index a359f96f1..d53b422ca 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -70,8 +70,8 @@ void SetupDirtyViewports(Tables& tables) { FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports); FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports); - tables[0][OFF(viewport_scale_offset_enbled)] = ViewportTransform; - tables[1][OFF(viewport_scale_offset_enbled)] = Viewports; + tables[0][OFF(viewport_scale_offset_enabled)] = ViewportTransform; + tables[1][OFF(viewport_scale_offset_enabled)] = Viewports; } void SetupDirtyScissors(Tables& tables) { diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index b24f3424a..b7843e995 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -68,13 +68,15 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + using Shader::Backend::SPIRV::RenderAreaLayout; using Shader::Backend::SPIRV::RescalingLayout; const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u; const VkPushConstantRange range{ .stageFlags = static_cast<VkShaderStageFlags>( is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS), .offset = 0, - .size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset, + .size = static_cast<u32>(sizeof(RescalingLayout)) - size_offset + + static_cast<u32>(sizeof(RenderAreaLayout)), }; return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, @@ -167,6 +169,12 @@ private: u32 image_bit{1u}; }; +class RenderAreaPushConstant { +public: + bool uses_render_area{}; + std::array<f32, 4> words{}; +}; + inline void PushImageDescriptors(TextureCache& texture_cache, UpdateDescriptorQueue& update_descriptor_queue, const Shader::Info& info, RescalingPushConstant& rescaling, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index cb7fa2078..89426121f 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -480,11 +480,15 @@ void BlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { fsr.reset(); } - if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) { + if (framebuffer.width == raw_width && framebuffer.height == raw_height && + framebuffer.pixel_format == pixel_format && !raw_images.empty()) { return; } + raw_width = framebuffer.width; raw_height = framebuffer.height; + pixel_format = framebuffer.pixel_format; + ReleaseRawImages(); CreateStagingBuffer(framebuffer); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 29e2ea925..a2b73ec54 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -28,6 +28,10 @@ namespace VideoCore { class RasterizerInterface; } +namespace Service::android { +enum class PixelFormat : u32; +} + namespace Vulkan { struct ScreenInfo; @@ -156,6 +160,7 @@ private: u32 raw_width = 0; u32 raw_height = 0; + Service::android::PixelFormat pixel_format{}; std::unique_ptr<FSR> fsr; }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index c3f66c8a3..1aa116cea 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -31,6 +31,7 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Shader::Backend::SPIRV::RENDERAREA_LAYOUT_OFFSET; using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET; using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; using Tegra::Texture::TexturePair; @@ -433,12 +434,19 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { update_descriptor_queue.Acquire(); RescalingPushConstant rescaling; + RenderAreaPushConstant render_area; const VkSampler* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling, samplers_it, views_it); + const auto& info{stage_infos[0]}; + if (info.uses_render_area) { + render_area.uses_render_area = true; + render_area.words = {static_cast<float>(regs.surface_clip.width), + static_cast<float>(regs.surface_clip.height)}; + } }}; if constexpr (Spec::enabled_stages[0]) { prepare_stage(0); @@ -455,10 +463,11 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { prepare_stage(4); } - ConfigureDraw(rescaling); + ConfigureDraw(rescaling, render_area); } -void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { +void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, + const RenderAreaPushConstant& render_area) { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); @@ -474,7 +483,9 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), - is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) { + is_rescaling, update_rescaling, + uses_render_area = render_area.uses_render_area, + render_area_data = render_area.words](vk::CommandBuffer cmdbuf) { if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); } @@ -488,6 +499,11 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor), &scale_down_factor); } + if (uses_render_area) { + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RENDERAREA_LAYOUT_OFFSET, sizeof(render_area_data), + &render_area_data); + } if (!descriptor_set_layout) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 85602592b..6bf577d25 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -62,6 +62,7 @@ class Device; class PipelineStatistics; class RenderPassCache; class RescalingPushConstant; +class RenderAreaPushConstant; class Scheduler; class UpdateDescriptorQueue; @@ -119,7 +120,8 @@ private: template <typename Spec> void ConfigureImpl(bool is_indexed); - void ConfigureDraw(const RescalingPushConstant& rescaling); + void ConfigureDraw(const RescalingPushConstant& rescaling, + const RenderAreaPushConstant& render_are); void MakePipeline(VkRenderPass render_pass); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 13d5a1f67..d4b0a542a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -53,7 +53,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 6; +constexpr u32 CACHE_VERSION = 7; template <typename Container> auto MakeSpan(Container& container) { @@ -166,6 +166,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program } break; case Shader::Stage::TessellationEval: + info.tess_clockwise = key.state.tessellation_clockwise != 0; info.tess_primitive = [&key] { const u32 raw{key.state.tessellation_primitive.Value()}; switch (static_cast<Maxwell::Tessellation::DomainType>(raw)) { @@ -325,6 +326,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .support_int64 = device.IsShaderInt64Supported(), .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, + .support_snorm_render_buffer = true, }; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 47dfb45a1..f69c0c50f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -127,11 +127,10 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3 return scissor; } -DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, - bool is_indexed) { +DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_indexed) { DrawParams params{ .base_instance = regs.global_base_instance_index, - .num_instances = is_instanced ? num_instances : 1, + .num_instances = num_instances, .base_vertex = is_indexed ? regs.global_base_vertex_index : regs.vertex_buffer.first, .num_vertices = is_indexed ? regs.index_buffer.count : regs.vertex_buffer.count, .first_index = is_indexed ? regs.index_buffer.first : 0, @@ -157,12 +156,10 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), update_descriptor_queue(device, scheduler), blit_image(device, scheduler, state_tracker, descriptor_pool), - astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, - memory_allocator), render_pass_cache(device), texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, - blit_image, astc_decoder_pass, - render_pass_cache}, + blit_image, render_pass_cache, + descriptor_pool, update_descriptor_queue}, texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), @@ -177,7 +174,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; -void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { +void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { MICROPROFILE_SCOPE(Vulkan_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); @@ -194,13 +191,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); + BindInlineIndexBuffer(); + BeginTransformFeedback(); UpdateDynamicStates(); const auto& regs{maxwell3d->regs}; - const u32 num_instances{maxwell3d->mme_draw.instance_count}; - const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + const u32 num_instances{instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, @@ -304,14 +303,19 @@ void RasterizerVulkan::Clear() { } } - scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { - const VkClearAttachment attachment{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .colorAttachment = color_attachment, - .clearValue = clear_value, - }; - cmdbuf.ClearAttachments(attachment, clear_rect); - }); + if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B && + regs.clear_surface.A) { + scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { + const VkClearAttachment attachment{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = color_attachment, + .clearValue = clear_value, + }; + cmdbuf.ClearAttachments(attachment, clear_rect); + }); + } else { + UNIMPLEMENTED_MSG("Unimplemented Clear only the specified channel"); + } } if (!use_depth && !use_stencil) { @@ -679,6 +683,22 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg if (!state_tracker.TouchViewports()) { return; } + if (!regs.viewport_scale_offset_enabled) { + const auto x = static_cast<float>(regs.surface_clip.x); + const auto y = static_cast<float>(regs.surface_clip.y); + const auto width = static_cast<float>(regs.surface_clip.width); + const auto height = static_cast<float>(regs.surface_clip.height); + VkViewport viewport{ + .x = x, + .y = y, + .width = width != 0.0f ? width : 1.0f, + .height = height != 0.0f ? height : 1.0f, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewport); }); + return; + } const bool is_rescaling{texture_cache.IsRescaling()}; const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; const std::array viewports{ @@ -1009,4 +1029,17 @@ void RasterizerVulkan::ReleaseChannel(s32 channel_id) { query_cache.EraseChannel(channel_id); } +void RasterizerVulkan::BindInlineIndexBuffer() { + if (maxwell3d->inline_index_draw_indexes.empty()) { + return; + } + const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size()); + auto buffer = buffer_cache_runtime.UploadStagingBuffer(data_count); + std::memcpy(buffer.mapped_span.data(), maxwell3d->inline_index_draw_indexes.data(), data_count); + buffer_cache_runtime.BindIndexBuffer( + maxwell3d->regs.draw.topology, maxwell3d->regs.index_buffer.format, + maxwell3d->regs.index_buffer.first, maxwell3d->regs.index_buffer.count, buffer.buffer, + static_cast<u32>(buffer.offset), data_count); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4cde3c983..b0bc306f5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -64,7 +64,7 @@ public: StateTracker& state_tracker_, Scheduler& scheduler_); ~RasterizerVulkan() override; - void Draw(bool is_indexed, bool is_instanced) override; + void Draw(bool is_indexed, u32 instance_count) override; void Clear() override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -141,6 +141,8 @@ private: void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + void BindInlineIndexBuffer(); + Tegra::GPU& gpu; ScreenInfo& screen_info; @@ -153,7 +155,6 @@ private: DescriptorPool descriptor_pool; UpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; - ASTCDecoderPass astc_decoder_pass; RenderPassCache render_pass_cache; TextureCacheRuntime texture_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index c04aad08f..929216749 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -144,7 +144,6 @@ private: using FuncType = TypedCommand<T>; static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large"); - recorded_counts++; command_offset = Common::AlignUp(command_offset, alignof(FuncType)); if (command_offset > sizeof(data) - sizeof(FuncType)) { return false; @@ -166,7 +165,7 @@ private: } bool Empty() const { - return recorded_counts == 0; + return command_offset == 0; } bool HasSubmit() const { @@ -177,7 +176,6 @@ private: Command* first = nullptr; Command* last = nullptr; - size_t recorded_counts = 0; size_t command_offset = 0; bool submit = false; alignas(std::max_align_t) std::array<u8, 0x8000> data{}; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index b87c3be66..edb41b171 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -51,7 +51,7 @@ Flags MakeInvalidationFlags() { void SetupDirtyViewports(Tables& tables) { FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports); FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports); - tables[0][OFF(viewport_scale_offset_enbled)] = Viewports; + tables[0][OFF(viewport_scale_offset_enabled)] = Viewports; tables[1][OFF(window_origin)] = Viewports; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 305ad8aee..853b80d8a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -791,12 +791,17 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched MemoryAllocator& memory_allocator_, StagingBufferPool& staging_buffer_pool_, BlitImageHelper& blit_image_helper_, - ASTCDecoderPass& astc_decoder_pass_, - RenderPassCache& render_pass_cache_) + RenderPassCache& render_pass_cache_, + DescriptorPool& descriptor_pool, + UpdateDescriptorQueue& update_descriptor_queue) : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_}, - astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_}, - resolution{Settings::values.resolution_info} {} + render_pass_cache{render_pass_cache_}, resolution{Settings::values.resolution_info} { + if (Settings::values.accelerate_astc) { + astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, + update_descriptor_queue, memory_allocator); + } +} void TextureCacheRuntime::Finish() { scheduler.Finish(); @@ -1782,17 +1787,17 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, const auto& resolution = runtime.resolution; - u32 width = 0; - u32 height = 0; + u32 width = std::numeric_limits<u32>::max(); + u32 height = std::numeric_limits<u32>::max(); for (size_t index = 0; index < NUM_RT; ++index) { const ImageView* const color_buffer = color_buffers[index]; if (!color_buffer) { renderpass_key.color_formats[index] = PixelFormat::Invalid; continue; } - width = std::max(width, is_rescaled ? resolution.ScaleUp(color_buffer->size.width) + width = std::min(width, is_rescaled ? resolution.ScaleUp(color_buffer->size.width) : color_buffer->size.width); - height = std::max(height, is_rescaled ? resolution.ScaleUp(color_buffer->size.height) + height = std::min(height, is_rescaled ? resolution.ScaleUp(color_buffer->size.height) : color_buffer->size.height); attachments.push_back(color_buffer->RenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; @@ -1804,9 +1809,9 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, } const size_t num_colors = attachments.size(); if (depth_buffer) { - width = std::max(width, is_rescaled ? resolution.ScaleUp(depth_buffer->size.width) + width = std::min(width, is_rescaled ? resolution.ScaleUp(depth_buffer->size.width) : depth_buffer->size.width); - height = std::max(height, is_rescaled ? resolution.ScaleUp(depth_buffer->size.height) + height = std::min(height, is_rescaled ? resolution.ScaleUp(depth_buffer->size.height) : depth_buffer->size.height); attachments.push_back(depth_buffer->RenderTarget()); renderpass_key.depth_format = depth_buffer->format; @@ -1845,7 +1850,7 @@ void TextureCacheRuntime::AccelerateImageUpload( Image& image, const StagingBufferRef& map, std::span<const VideoCommon::SwizzleParameters> swizzles) { if (IsPixelFormatASTC(image.info.format)) { - return astc_decoder_pass.Assemble(image, map, swizzles); + return astc_decoder_pass->Assemble(image, map, swizzles); } ASSERT(false); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 0b7ac0df1..7ec0df134 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -6,6 +6,7 @@ #include <span> #include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -25,14 +26,15 @@ using VideoCommon::RenderTargets; using VideoCommon::SlotVector; using VideoCore::Surface::PixelFormat; -class ASTCDecoderPass; class BlitImageHelper; +class DescriptorPool; class Device; class Image; class ImageView; class Framebuffer; class RenderPassCache; class StagingBufferPool; +class UpdateDescriptorQueue; class Scheduler; class TextureCacheRuntime { @@ -41,8 +43,9 @@ public: MemoryAllocator& memory_allocator_, StagingBufferPool& staging_buffer_pool_, BlitImageHelper& blit_image_helper_, - ASTCDecoderPass& astc_decoder_pass_, - RenderPassCache& render_pass_cache_); + RenderPassCache& render_pass_cache_, + DescriptorPool& descriptor_pool, + UpdateDescriptorQueue& update_descriptor_queue); void Finish(); @@ -97,8 +100,8 @@ public: MemoryAllocator& memory_allocator; StagingBufferPool& staging_buffer_pool; BlitImageHelper& blit_image_helper; - ASTCDecoderPass& astc_decoder_pass; RenderPassCache& render_pass_cache; + std::optional<ASTCDecoderPass> astc_decoder_pass; const Settings::ResolutionScalingInfo& resolution; constexpr static size_t indexing_slots = 8 * sizeof(size_t); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index fbabb3219..f24f320b6 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -19,6 +19,7 @@ #include "video_core/engines/kepler_compute.h" #include "video_core/memory_manager.h" #include "video_core/shader_environment.h" +#include "video_core/texture_cache/format_lookup_table.h" #include "video_core/textures/texture.h" namespace VideoCommon { @@ -33,7 +34,7 @@ static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast<u64>(index) << 32) | offset; } -static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { +static Shader::TextureType ConvertTextureType(const Tegra::Texture::TICEntry& entry) { switch (entry.texture_type) { case Tegra::Texture::TextureType::Texture1D: return Shader::TextureType::Color1D; @@ -59,6 +60,26 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { } } +static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture::TICEntry& entry) { + switch (PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type, entry.b_type, + entry.a_type, entry.srgb_conversion)) { + case VideoCore::Surface::PixelFormat::A8B8G8R8_SNORM: + return Shader::TexturePixelFormat::A8B8G8R8_SNORM; + case VideoCore::Surface::PixelFormat::R8_SNORM: + return Shader::TexturePixelFormat::R8_SNORM; + case VideoCore::Surface::PixelFormat::R8G8_SNORM: + return Shader::TexturePixelFormat::R8G8_SNORM; + case VideoCore::Surface::PixelFormat::R16G16B16A16_SNORM: + return Shader::TexturePixelFormat::R16G16B16A16_SNORM; + case VideoCore::Surface::PixelFormat::R16G16_SNORM: + return Shader::TexturePixelFormat::R16G16_SNORM; + case VideoCore::Surface::PixelFormat::R16_SNORM: + return Shader::TexturePixelFormat::R16_SNORM; + default: + return Shader::TexturePixelFormat::OTHER; + } +} + static std::string_view StageToPrefix(Shader::Stage stage) { switch (stage) { case Shader::Stage::VertexB: @@ -178,22 +199,31 @@ void GenericEnvironment::Dump(u64 hash) { void GenericEnvironment::Serialize(std::ofstream& file) const { const u64 code_size{static_cast<u64>(CachedSize())}; const u64 num_texture_types{static_cast<u64>(texture_types.size())}; + const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast<const char*>(&num_texture_pixel_formats), + sizeof(num_texture_pixel_formats)) .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest)) .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest)) + .write(reinterpret_cast<const char*>(&viewport_transform_state), + sizeof(viewport_transform_state)) .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) .write(reinterpret_cast<const char*>(code.data()), code_size); for (const auto& [key, type] : texture_types) { file.write(reinterpret_cast<const char*>(&key), sizeof(key)) .write(reinterpret_cast<const char*>(&type), sizeof(type)); } + for (const auto& [key, format] : texture_pixel_formats) { + file.write(reinterpret_cast<const char*>(&key), sizeof(key)) + .write(reinterpret_cast<const char*>(&format), sizeof(format)); + } for (const auto& [key, type] : cbuf_values) { file.write(reinterpret_cast<const char*>(&key), sizeof(key)) .write(reinterpret_cast<const char*>(&type), sizeof(type)); @@ -237,15 +267,13 @@ std::optional<u64> GenericEnvironment::TryFindSize() { return std::nullopt; } -Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, - bool via_header_index, u32 raw) { +Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw) { const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); - const Shader::TextureType result{ConvertType(entry)}; - texture_types.emplace(raw, result); - return result; + return entry; } GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -305,8 +333,27 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; - return ReadTextureTypeImpl(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, - handle); + auto entry = + ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle); + const Shader::TextureType result{ConvertTextureType(entry)}; + texture_types.emplace(handle, result); + return result; +} + +Shader::TexturePixelFormat GraphicsEnvironment::ReadTexturePixelFormat(u32 handle) { + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; + auto entry = + ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle); + const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry)); + texture_pixel_formats.emplace(handle, result); + return result; +} + +u32 GraphicsEnvironment::ReadViewportTransformState() { + const auto& regs{maxwell3d->regs}; + viewport_transform_state = regs.viewport_scale_offset_enabled; + return viewport_transform_state; } ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, @@ -337,21 +384,41 @@ u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); + auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); + const Shader::TextureType result{ConvertTextureType(entry)}; + texture_types.emplace(handle, result); + return result; +} + +Shader::TexturePixelFormat ComputeEnvironment::ReadTexturePixelFormat(u32 handle) { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); + const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry)); + texture_pixel_formats.emplace(handle, result); + return result; +} + +u32 ComputeEnvironment::ReadViewportTransformState() { + return viewport_transform_state; } void FileEnvironment::Deserialize(std::ifstream& file) { u64 code_size{}; u64 num_texture_types{}; + u64 num_texture_pixel_formats{}; u64 num_cbuf_values{}; file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast<char*>(&num_texture_pixel_formats), + sizeof(num_texture_pixel_formats)) .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast<char*>(&viewport_transform_state), sizeof(viewport_transform_state)) .read(reinterpret_cast<char*>(&stage), sizeof(stage)); code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast<char*>(code.get()), code_size); @@ -362,6 +429,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast<char*>(&type), sizeof(type)); texture_types.emplace(key, type); } + for (size_t i = 0; i < num_texture_pixel_formats; ++i) { + u32 key; + Shader::TexturePixelFormat format; + file.read(reinterpret_cast<char*>(&key), sizeof(key)) + .read(reinterpret_cast<char*>(&format), sizeof(format)); + texture_pixel_formats.emplace(key, format); + } for (size_t i = 0; i < num_cbuf_values; ++i) { u64 key; u32 value; @@ -409,6 +483,18 @@ Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { return it->second; } +Shader::TexturePixelFormat FileEnvironment::ReadTexturePixelFormat(u32 handle) { + const auto it{texture_pixel_formats.find(handle)}; + if (it == texture_pixel_formats.end()) { + throw Shader::LogicError("Uncached read texture pixel format"); + } + return it->second; +} + +u32 FileEnvironment::ReadViewportTransformState() { + return viewport_transform_state; +} + u32 FileEnvironment::LocalMemorySize() const { return local_memory_size; } diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 8b3b8e9f5..bb55b029f 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -63,14 +63,15 @@ public: protected: std::optional<u64> TryFindSize(); - Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - u32 raw); + Tegra::Texture::TICEntry ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw); Tegra::MemoryManager* gpu_memory{}; GPUVAddr program_base{}; std::vector<u64> code; std::unordered_map<u32, Shader::TextureType> texture_types; + std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; std::unordered_map<u64, u32> cbuf_values; u32 local_memory_size{}; @@ -85,6 +86,8 @@ protected: u32 cached_highest = 0; u32 initial_offset = 0; + u32 viewport_transform_state = 1; + bool has_unbound_instructions = false; }; @@ -102,6 +105,10 @@ public: Shader::TextureType ReadTextureType(u32 handle) override; + Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; + + u32 ReadViewportTransformState() override; + private: Tegra::Engines::Maxwell3D* maxwell3d{}; size_t stage_index{}; @@ -120,6 +127,10 @@ public: Shader::TextureType ReadTextureType(u32 handle) override; + Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; + + u32 ReadViewportTransformState() override; + private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; @@ -143,6 +154,10 @@ public: [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; + [[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; + + [[nodiscard]] u32 ReadViewportTransformState() override; + [[nodiscard]] u32 LocalMemorySize() const override; [[nodiscard]] u32 SharedMemorySize() const override; @@ -156,6 +171,7 @@ public: private: std::unique_ptr<u64[]> code; std::unordered_map<u32, Shader::TextureType> texture_types; + std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; std::unordered_map<u64, u32> cbuf_values; std::array<u32, 3> workgroup_size{}; u32 local_memory_size{}; @@ -164,6 +180,7 @@ private: u32 read_lowest{}; u32 read_highest{}; u32 initial_offset{}; + u32 viewport_transform_state = 1; }; void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 1223df5a0..e8c908b42 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -516,7 +516,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; - UNIMPLEMENTED_IF(info.tile_width_spacing > 0); UNIMPLEMENTED_IF(copy.image_offset.x != 0); UNIMPLEMENTED_IF(copy.image_offset.y != 0); UNIMPLEMENTED_IF(copy.image_offset.z != 0); |