diff options
Diffstat (limited to 'src/video_core/engines')
-rw-r--r-- | src/video_core/engines/draw_manager.cpp | 62 | ||||
-rw-r--r-- | src/video_core/engines/draw_manager.h | 45 | ||||
-rw-r--r-- | src/video_core/engines/engine_interface.h | 24 | ||||
-rw-r--r-- | src/video_core/engines/engine_upload.cpp | 2 | ||||
-rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 16 | ||||
-rw-r--r-- | src/video_core/engines/fermi_2d.h | 3 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 14 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.h | 2 | ||||
-rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 11 | ||||
-rw-r--r-- | src/video_core/engines/kepler_memory.h | 2 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 183 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 91 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 33 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.h | 2 |
14 files changed, 452 insertions, 38 deletions
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 3a78421f6..1d22d25f1 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) { LOG_WARNING(HW_GPU, "(STUBBED) called"); break; } + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): { + DrawTexture(); + break; + } default: break; } @@ -91,6 +95,23 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind ProcessDraw(true, num_instances); } +void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { + draw_state.topology = topology; + + ProcessDrawIndirect(); +} + +void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, + u32 index_count) { + const auto& regs{maxwell3d->regs}; + draw_state.topology = topology; + draw_state.index_buffer = regs.index_buffer; + draw_state.index_buffer.first = index_first; + draw_state.index_buffer.count = index_count; + + ProcessDrawIndirect(); +} + void DrawManager::SetInlineIndexBuffer(u32 index) { draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff)); draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8)); @@ -162,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) { ProcessDraw(true, 1); } +void DrawManager::DrawTexture() { + const auto& regs{maxwell3d->regs}; + draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f; + draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f; + const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f; + const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f; + const bool lower_left{regs.window_origin.mode != + Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft}; + if (lower_left) { + draw_texture_state.dst_y0 -= dst_height; + } + draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width; + draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height; + draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f; + draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f; + draw_texture_state.src_x1 = + (static_cast<float>(regs.draw_texture.dx_du) / 4294967296.f) * dst_width + + draw_texture_state.src_x0; + draw_texture_state.src_y1 = + (static_cast<float>(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height + + draw_texture_state.src_y0; + draw_texture_state.src_sampler = regs.draw_texture.src_sampler; + draw_texture_state.src_texture = regs.draw_texture.src_texture; + + maxwell3d->rasterizer->DrawTexture(); +} + void DrawManager::UpdateTopology() { const auto& regs{maxwell3d->regs}; switch (regs.primitive_topology_control) { @@ -198,4 +246,18 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) { maxwell3d->rasterizer->Draw(draw_indexed, instance_count); } } + +void DrawManager::ProcessDrawIndirect() { + LOG_TRACE( + HW_GPU, + "called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}", + draw_state.topology, indirect_state.is_indexed, indirect_state.include_count, + indirect_state.buffer_size, indirect_state.max_draw_counts); + + UpdateTopology(); + + if (maxwell3d->ShouldExecute()) { + maxwell3d->rasterizer->DrawIndirect(); + } +} } // namespace Tegra::Engines diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 0e6930a9c..7c22c49f1 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -32,6 +32,29 @@ public: std::vector<u8> inline_index_draw_indexes; }; + struct DrawTextureState { + f32 dst_x0; + f32 dst_y0; + f32 dst_x1; + f32 dst_y1; + f32 src_x0; + f32 src_y0; + f32 src_x1; + f32 src_y1; + u32 src_sampler; + u32 src_texture; + }; + + struct IndirectParams { + bool is_indexed; + bool include_count; + GPUVAddr count_start_address; + GPUVAddr indirect_start_address; + size_t buffer_size; + size_t max_draw_counts; + size_t stride; + }; + explicit DrawManager(Maxwell3D* maxwell_3d); void ProcessMethodCall(u32 method, u32 argument); @@ -46,10 +69,26 @@ public: void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances); + void DrawArrayIndirect(PrimitiveTopology topology); + + void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count); + const State& GetDrawState() const { return draw_state; } + const DrawTextureState& GetDrawTextureState() const { + return draw_texture_state; + } + + IndirectParams& GetIndirectParams() { + return indirect_state; + } + + const IndirectParams& GetIndirectParams() const { + return indirect_state; + } + private: void SetInlineIndexBuffer(u32 index); @@ -59,11 +98,17 @@ private: void DrawIndexSmall(u32 argument); + void DrawTexture(); + void UpdateTopology(); void ProcessDraw(bool draw_indexed, u32 instance_count); + void ProcessDrawIndirect(); + Maxwell3D* maxwell3d{}; State draw_state{}; + DrawTextureState draw_texture_state{}; + IndirectParams indirect_state{}; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 26cde8584..392322358 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -3,6 +3,10 @@ #pragma once +#include <bitset> +#include <limits> +#include <vector> + #include "common/common_types.h" namespace Tegra::Engines { @@ -17,6 +21,26 @@ public: /// Write multiple values to the register identified by method. virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) = 0; + + void ConsumeSink() { + if (method_sink.empty()) { + return; + } + ConsumeSinkImpl(); + } + + std::bitset<std::numeric_limits<u16>::max()> execution_mask{}; + std::vector<std::pair<u32, u32>> method_sink{}; + bool current_dirty{}; + GPUVAddr current_dma_segment; + +protected: + virtual void ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + CallMethod(method, value, true); + } + method_sink.clear(); + } }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index cea1dd8b0..7f5a0c29d 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) { regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, x_elements, regs.line_count, regs.dest.BlockHeight(), regs.dest.BlockDepth(), regs.line_length_in); - memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); + memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index c6478ae85..a126c359c 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/sw_blitter/blitter.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" @@ -20,11 +21,14 @@ namespace Tegra::Engines { using namespace Texture; -Fermi2D::Fermi2D(MemoryManager& memory_manager_) { - sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_); +Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} { + sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager); // Nvidia's OpenGL driver seems to assume these values regs.src.depth = 1; regs.dst.depth = 1; + + execution_mask.reset(); + execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true; } Fermi2D::~Fermi2D() = default; @@ -49,6 +53,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 } } +void Fermi2D::ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + regs.reg_array[method] = value; + } + method_sink.clear(); +} + void Fermi2D::Blit() { MICROPROFILE_SCOPE(GPU_BlitEngine); LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", @@ -94,6 +105,7 @@ void Fermi2D::Blit() { config.src_x0 = 0; } + memory_manager.FlushCaching(); if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { sw_blitter->Blit(src, regs.dst, config); } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 100b21bac..705b323e1 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -305,10 +305,13 @@ public: private: VideoCore::RasterizerInterface* rasterizer = nullptr; std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter; + MemoryManager& memory_manager; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. void Blit(); + + void ConsumeSinkImpl() override; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index e5c622155..601095f03 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -14,7 +14,12 @@ namespace Tegra::Engines { KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) - : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {} + : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} { + execution_mask.reset(); + execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true; + execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true; + execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true; +} KeplerCompute::~KeplerCompute() = default; @@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) upload_state.BindRasterizer(rasterizer); } +void KeplerCompute::ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + regs.reg_array[method] = value; + } + method_sink.clear(); +} + void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid KeplerCompute register, increase the size of the Regs structure"); diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index e154e3f06..2092e685f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -204,6 +204,8 @@ public: private: void ProcessLaunch(); + void ConsumeSinkImpl() override; + /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 08045d1cf..c026801a3 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default; void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { upload_state.BindRasterizer(rasterizer_); + + execution_mask.reset(); + execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true; + execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true; +} + +void KeplerMemory::ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + regs.reg_array[method] = value; + } + method_sink.clear(); } void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) { diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5fe7489f0..fb1eecbba 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -73,6 +73,8 @@ public: } regs{}; private: + void ConsumeSinkImpl() override; + Core::System& system; Upload::State upload_state; }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9b182b653..ae9da6290 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,6 +4,8 @@ #include <cstring> #include <optional> #include "common/assert.h" +#include "common/scope_exit.h" +#include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" #include "video_core/dirty_flags.h" @@ -28,6 +30,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) regs.upload} { dirty.flags.flip(); InitializeRegisterDefaults(); + execution_mask.reset(); + for (size_t i = 0; i < execution_mask.size(); i++) { + execution_mask[i] = IsMethodExecutable(static_cast<u32>(i)); + } } Maxwell3D::~Maxwell3D() = default; @@ -121,6 +127,72 @@ void Maxwell3D::InitializeRegisterDefaults() { shadow_state = regs; } +bool Maxwell3D::IsMethodExecutable(u32 method) { + if (method >= MacroRegistersStart) { + return true; + } + switch (method) { + case MAXWELL3D_REG_INDEX(draw.end): + case MAXWELL3D_REG_INDEX(draw.begin): + case MAXWELL3D_REG_INDEX(vertex_buffer.first): + case MAXWELL3D_REG_INDEX(vertex_buffer.count): + case MAXWELL3D_REG_INDEX(index_buffer.first): + case MAXWELL3D_REG_INDEX(index_buffer.count): + case MAXWELL3D_REG_INDEX(draw_inline_index): + case MAXWELL3D_REG_INDEX(index_buffer32_subsequent): + case MAXWELL3D_REG_INDEX(index_buffer16_subsequent): + case MAXWELL3D_REG_INDEX(index_buffer8_subsequent): + case MAXWELL3D_REG_INDEX(index_buffer32_first): + case MAXWELL3D_REG_INDEX(index_buffer16_first): + case MAXWELL3D_REG_INDEX(index_buffer8_first): + case MAXWELL3D_REG_INDEX(inline_index_2x16.even): + case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): + case MAXWELL3D_REG_INDEX(vertex_array_instance_first): + case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): + case MAXWELL3D_REG_INDEX(wait_for_idle): + case MAXWELL3D_REG_INDEX(shadow_ram_control): + case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): + case MAXWELL3D_REG_INDEX(load_mme.instruction): + case MAXWELL3D_REG_INDEX(load_mme.start_address): + case MAXWELL3D_REG_INDEX(falcon[4]): + case MAXWELL3D_REG_INDEX(const_buffer.buffer): + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14: + case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15: + case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config): + case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config): + case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config): + case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config): + case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config): + case MAXWELL3D_REG_INDEX(topology_override): + case MAXWELL3D_REG_INDEX(clear_surface): + case MAXWELL3D_REG_INDEX(report_semaphore.query): + case MAXWELL3D_REG_INDEX(render_enable.mode): + case MAXWELL3D_REG_INDEX(clear_report_value): + case MAXWELL3D_REG_INDEX(sync_info): + case MAXWELL3D_REG_INDEX(launch_dma): + case MAXWELL3D_REG_INDEX(inline_data): + case MAXWELL3D_REG_INDEX(fragment_barrier): + case MAXWELL3D_REG_INDEX(tiled_cache_barrier): + return true; + default: + return false; + } +} + void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { if (executing_macro == 0) { // A macro call must begin by writing the macro method's register, not its argument. @@ -130,12 +202,70 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool } macro_params.insert(macro_params.end(), base_start, base_start + amount); + for (size_t i = 0; i < amount; i++) { + macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); + } + macro_segments.emplace_back(current_dma_segment, amount); + current_macro_dirty |= current_dirty; + current_dirty = false; // Call the macro when there are no more parameters in the command buffer if (is_last_call) { + ConsumeSink(); CallMacroMethod(executing_macro, macro_params); macro_params.clear(); + macro_addresses.clear(); + macro_segments.clear(); + current_macro_dirty = false; + } +} + +void Maxwell3D::RefreshParametersImpl() { + size_t current_index = 0; + for (auto& segment : macro_segments) { + if (segment.first == 0) { + current_index += segment.second; + continue; + } + memory_manager.ReadBlock(segment.first, ¯o_params[current_index], + sizeof(u32) * segment.second); + current_index += segment.second; + } +} + +u32 Maxwell3D::GetMaxCurrentVertices() { + u32 num_vertices = 0; + for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { + const auto& array = regs.vertex_streams[index]; + if (array.enable == 0) { + continue; + } + const auto& attribute = regs.vertex_attrib_format[index]; + if (attribute.constant) { + num_vertices = std::max(num_vertices, 1U); + continue; + } + const auto& limit = regs.vertex_stream_limits[index]; + const GPUVAddr gpu_addr_begin = array.Address(); + const GPUVAddr gpu_addr_end = limit.Address() + 1; + const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); + num_vertices = std::max( + num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value())); } + return num_vertices; +} + +size_t Maxwell3D::EstimateIndexBufferSize() { + GPUVAddr start_address = regs.index_buffer.StartAddress(); + GPUVAddr end_address = regs.index_buffer.EndAddress(); + constexpr std::array<size_t, 4> max_sizes = { + std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(), + std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; + const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); + return std::min<size_t>( + memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / + byte_size, + static_cast<size_t>(end_address - start_address)); } u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { @@ -152,6 +282,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { return argument; } +void Maxwell3D::ConsumeSinkImpl() { + SCOPE_EXIT({ method_sink.clear(); }); + const auto control = shadow_state.shadow_ram_control; + if (control == Regs::ShadowRamControl::Track || + control == Regs::ShadowRamControl::TrackWithFilter) { + + for (auto [method, value] : method_sink) { + shadow_state.reg_array[method] = value; + ProcessDirtyRegisters(method, value); + } + return; + } + if (control == Regs::ShadowRamControl::Replay) { + for (auto [method, value] : method_sink) { + ProcessDirtyRegisters(method, shadow_state.reg_array[method]); + } + return; + } + for (auto [method, value] : method_sink) { + ProcessDirtyRegisters(method, value); + } +} + void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) { if (regs.reg_array[method] == argument) { return; @@ -263,7 +416,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { const u32 argument = ProcessShadowRam(method, method_argument); ProcessDirtyRegisters(method, argument); - ProcessMethodCall(method, argument, method_argument, is_last_call); } @@ -294,9 +446,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15: ProcessCBMultiData(base_start, amount); break; - case MAXWELL3D_REG_INDEX(inline_data): + case MAXWELL3D_REG_INDEX(inline_data): { + ASSERT(methods_pending == amount); upload_state.ProcessData(base_start, amount); return; + } default: for (u32 i = 0; i < amount; i++) { CallMethod(method, base_start[i], methods_pending - i <= 1); @@ -314,7 +468,7 @@ void Maxwell3D::ProcessMacroBind(u32 data) { } void Maxwell3D::ProcessFirmwareCall4() { - LOG_WARNING(HW_GPU, "(STUBBED) called"); + LOG_DEBUG(HW_GPU, "(STUBBED) called"); // Firmware call 4 is a blob that changes some registers depending on its parameters. // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1. @@ -332,11 +486,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { } void Maxwell3D::ProcessQueryGet() { - // TODO(Subv): Support the other query units. - if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) { - LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented"); - } - switch (regs.report_semaphore.query.operation) { case Regs::ReportSemaphore::Operation::Release: if (regs.report_semaphore.query.short_query != 0) { @@ -389,7 +538,11 @@ void Maxwell3D::ProcessQueryCondition() { case Regs::RenderEnable::Override::NeverRender: execute_on = false; break; - case Regs::RenderEnable::Override::UseRenderEnable: + case Regs::RenderEnable::Override::UseRenderEnable: { + if (rasterizer->AccelerateConditionalRendering()) { + execute_on = true; + return; + } switch (regs.render_enable.mode) { case Regs::RenderEnable::Mode::True: { execute_on = true; @@ -427,6 +580,7 @@ void Maxwell3D::ProcessQueryCondition() { } break; } + } } void Maxwell3D::ProcessCounterReset() { @@ -463,7 +617,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() { } void Maxwell3D::ProcessCBBind(size_t stage_index) { - // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. + // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader + // stage. const auto& bind_data = regs.bind_groups[stage_index]; auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.shader_slot]; buffer.enabled = bind_data.valid.Value() != 0; @@ -490,7 +645,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) { const GPUVAddr address{buffer_address + regs.const_buffer.offset}; const size_t copy_size = amount * sizeof(u32); - memory_manager.WriteBlock(address, start_base, copy_size); + memory_manager.WriteBlockCached(address, start_base, copy_size); // Increment the current buffer position. regs.const_buffer.offset += static_cast<u32>(copy_size); @@ -524,4 +679,10 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } +void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset, + HLEReplacementAttributeType name) { + const u64 key = (static_cast<u64>(bank) << 32) | offset; + replace_table.emplace(key, name); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 22b904319..c89969bb4 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -272,6 +272,7 @@ public: }; union { + u32 raw; BitField<0, 1, Mode> mode; BitField<4, 8, u32> pad; }; @@ -1217,10 +1218,12 @@ public: struct Window { union { + u32 raw_x; BitField<0, 16, u32> x_min; BitField<16, 16, u32> x_max; }; union { + u32 raw_y; BitField<0, 16, u32> y_min; BitField<16, 16, u32> y_max; }; @@ -1596,6 +1599,20 @@ public: }; static_assert(sizeof(TIRModulationCoeff) == 0x4); + struct DrawTexture { + s32 dst_x0; + s32 dst_y0; + s32 dst_width; + s32 dst_height; + s64 dx_du; + s64 dy_dv; + u32 src_sampler; + u32 src_texture; + s32 src_x0; + s32 src_y0; + }; + static_assert(sizeof(DrawTexture) == 0x30); + struct ReduceColorThreshold { union { BitField<0, 8, u32> all_hit_once; @@ -2708,7 +2725,7 @@ public: u32 post_z_pixel_imask; ///< 0x0F1C INSERT_PADDING_BYTES_NOINIT(0x20); ConstantColorRendering const_color_rendering; ///< 0x0F40 - s32 stencil_back_ref; ///< 0x0F54 + u32 stencil_back_ref; ///< 0x0F54 u32 stencil_back_mask; ///< 0x0F58 u32 stencil_back_func_mask; ///< 0x0F5C INSERT_PADDING_BYTES_NOINIT(0x14); @@ -2748,7 +2765,7 @@ public: u32 reserved_sw_method2; ///< 0x102C std::array<TIRModulationCoeff, 5> tir_modulation_coeff; ///< 0x1030 std::array<u32, 15> spare_nop; ///< 0x1044 - INSERT_PADDING_BYTES_NOINIT(0x30); + DrawTexture draw_texture; ///< 0x1080 std::array<u32, 7> reserved_sw_method3_to_7; ///< 0x10B0 ReduceColorThreshold reduce_color_thresholds_unorm8; ///< 0x10CC std::array<u32, 4> reserved_sw_method10_to_13; ///< 0x10D0 @@ -2832,9 +2849,9 @@ public: Blend blend; ///< 0x133C u32 stencil_enable; ///< 0x1380 StencilOp stencil_front_op; ///< 0x1384 - s32 stencil_front_ref; ///< 0x1394 - s32 stencil_front_func_mask; ///< 0x1398 - s32 stencil_front_mask; ///< 0x139C + u32 stencil_front_ref; ///< 0x1394 + u32 stencil_front_func_mask; ///< 0x1398 + u32 stencil_front_mask; ///< 0x139C INSERT_PADDING_BYTES_NOINIT(0x4); u32 draw_auto_start_byte_count; ///< 0x13A4 PsSaturate frag_color_clamp; ///< 0x13A8 @@ -3020,6 +3037,24 @@ public: /// Store temporary hw register values, used by some calls to restore state after a operation Regs shadow_state; + // None Engine + enum class EngineHint : u32 { + None = 0x0, + OnHLEMacro = 0x1, + }; + + EngineHint engine_state{EngineHint::None}; + + enum class HLEReplacementAttributeType : u32 { + BaseVertex = 0x0, + BaseInstance = 0x1, + DrawID = 0x2, + }; + + void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name); + + std::unordered_map<u64, HLEReplacementAttributeType> replace_table; + static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); @@ -3067,6 +3102,35 @@ public: std::unique_ptr<DrawManager> draw_manager; friend class DrawManager; + GPUVAddr GetMacroAddress(size_t index) const { + return macro_addresses[index]; + } + + void RefreshParameters() { + if (!current_macro_dirty) { + return; + } + RefreshParametersImpl(); + } + + bool AnyParametersDirty() const { + return current_macro_dirty; + } + + u32 GetMaxCurrentVertices(); + + size_t EstimateIndexBufferSize(); + + /// Handles a write to the CLEAR_BUFFERS register. + void ProcessClearBuffers(u32 layer_count); + + /// Handles a write to the CB_BIND register. + void ProcessCBBind(size_t stage_index); + + /// Handles a write to the CB_DATA[i] register. + void ProcessCBData(u32 value); + void ProcessCBMultiData(const u32* start_base, u32 amount); + private: void InitializeRegisterDefaults(); @@ -3076,6 +3140,8 @@ private: void ProcessDirtyRegisters(u32 method, u32 argument); + void ConsumeSinkImpl() override; + void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); /// Retrieves information about a specific TIC entry from the TIC buffer. @@ -3116,16 +3182,13 @@ private: /// Handles writes to syncing register. void ProcessSyncPoint(); - /// Handles a write to the CB_DATA[i] register. - void ProcessCBData(u32 value); - void ProcessCBMultiData(const u32* start_base, u32 amount); - - /// Handles a write to the CB_BIND register. - void ProcessCBBind(size_t stage_index); - /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional<u64> GetQueryResult(); + void RefreshParametersImpl(); + + bool IsMethodExecutable(u32 method); + Core::System& system; MemoryManager& memory_manager; @@ -3145,6 +3208,10 @@ private: Upload::State upload_state; bool execute_on{true}; + + std::vector<std::pair<GPUVAddr, size_t>> macro_segments; + std::vector<GPUVAddr> macro_addresses; + bool current_macro_dirty{}; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index f73d7bf0f..7762c7d96 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -21,7 +21,10 @@ namespace Tegra::Engines { using namespace Texture; MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) - : system{system_}, memory_manager{memory_manager_} {} + : system{system_}, memory_manager{memory_manager_} { + execution_mask.reset(); + execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true; +} MaxwellDMA::~MaxwellDMA() = default; @@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { rasterizer = rasterizer_; } +void MaxwellDMA::ConsumeSinkImpl() { + for (auto [method, value] : method_sink) { + regs.reg_array[method] = value; + } + method_sink.clear(); +} + void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); @@ -59,7 +69,7 @@ void MaxwellDMA::Launch() { if (launch.multi_line_enable) { const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; - + memory_manager.FlushCaching(); if (!is_src_pitch && !is_dst_pitch) { // If both the source and the destination are in block layout, assert. CopyBlockLinearToBlockLinear(); @@ -94,6 +104,7 @@ void MaxwellDMA::Launch() { reinterpret_cast<u8*>(tmp_buffer.data()), regs.line_length_in * sizeof(u32)); } else { + memory_manager.FlushCaching(); const auto convert_linear_2_blocklinear_addr = [](u64 address) { return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | ((address & 0x180) >> 1) | ((address & 0x20) << 3); @@ -111,8 +122,8 @@ void MaxwellDMA::Launch() { memory_manager.ReadBlockUnsafe( convert_linear_2_blocklinear_addr(regs.offset_in + offset), tmp_buffer.data(), tmp_buffer.size()); - memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(), - tmp_buffer.size()); + memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), + tmp_buffer.size()); } } else if (is_src_pitch && !is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); @@ -122,7 +133,7 @@ void MaxwellDMA::Launch() { for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), tmp_buffer.size()); - memory_manager.WriteBlock( + memory_manager.WriteBlockCached( convert_linear_2_blocklinear_addr(regs.offset_out + offset), tmp_buffer.data(), tmp_buffer.size()); } @@ -131,8 +142,8 @@ void MaxwellDMA::Launch() { std::vector<u8> tmp_buffer(regs.line_length_in); memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); - memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), - regs.line_length_in); + memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), + regs.line_length_in); } } } @@ -194,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, regs.pitch_out); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::CopyPitchToBlockLinear() { @@ -246,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, regs.pitch_in); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::FastCopyBlockLinearToPitch() { @@ -277,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { regs.src_params.block_size.height, regs.src_params.block_size.depth, regs.pitch_out); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::CopyBlockLinearToBlockLinear() { @@ -337,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, dst.block_size.height, dst.block_size.depth, pitch); - memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::ReleaseSemaphore() { diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c88191a61..0e594fa74 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -231,6 +231,8 @@ private: void ReleaseSemaphore(); + void ConsumeSinkImpl() override; + Core::System& system; MemoryManager& memory_manager; |