diff options
Diffstat (limited to 'src/video_core/engines')
-rw-r--r-- | src/video_core/engines/draw_manager.h | 1 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 74 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 3 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 17 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.h | 55 | ||||
-rw-r--r-- | src/video_core/engines/puller.cpp | 13 |
6 files changed, 58 insertions, 105 deletions
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 7c22c49f1..18d959143 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -46,6 +46,7 @@ public: }; struct IndirectParams { + bool is_byte_count; bool is_indexed; bool include_count; GPUVAddr count_start_address; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 06e349e43..32d767d85 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -20,8 +20,6 @@ namespace Tegra::Engines { -using VideoCore::QueryType; - /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -500,27 +498,21 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { } void Maxwell3D::ProcessQueryGet() { + VideoCommon::QueryPropertiesFlags flags{}; + if (regs.report_semaphore.query.short_query == 0) { + flags |= VideoCommon::QueryPropertiesFlags::HasTimeout; + } + const GPUVAddr sequence_address{regs.report_semaphore.Address()}; + const VideoCommon::QueryType query_type = + static_cast<VideoCommon::QueryType>(regs.report_semaphore.query.report.Value()); + const u32 payload = regs.report_semaphore.payload; + const u32 subreport = regs.report_semaphore.query.sub_report; switch (regs.report_semaphore.query.operation) { case Regs::ReportSemaphore::Operation::Release: if (regs.report_semaphore.query.short_query != 0) { - const GPUVAddr sequence_address{regs.report_semaphore.Address()}; - const u32 payload = regs.report_semaphore.payload; - std::function<void()> operation([this, sequence_address, payload] { - memory_manager.Write<u32>(sequence_address, payload); - }); - rasterizer->SignalFence(std::move(operation)); - } else { - struct LongQueryResult { - u64_le value; - u64_le timestamp; - }; - const GPUVAddr sequence_address{regs.report_semaphore.Address()}; - const u32 payload = regs.report_semaphore.payload; - [this, sequence_address, payload] { - memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks()); - memory_manager.Write<u64>(sequence_address, payload); - }(); + flags |= VideoCommon::QueryPropertiesFlags::IsAFence; } + rasterizer->Query(sequence_address, query_type, flags, payload, subreport); break; case Regs::ReportSemaphore::Operation::Acquire: // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that @@ -528,11 +520,7 @@ void Maxwell3D::ProcessQueryGet() { UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); break; case Regs::ReportSemaphore::Operation::ReportOnly: - if (const std::optional<u64> result = GetQueryResult()) { - // If the query returns an empty optional it means it's cached and deferred. - // In this case we have a non-empty result, so we stamp it immediately. - StampQueryResult(*result, regs.report_semaphore.query.short_query == 0); - } + rasterizer->Query(sequence_address, query_type, flags, payload, subreport); break; case Regs::ReportSemaphore::Operation::Trap: UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); @@ -544,6 +532,10 @@ void Maxwell3D::ProcessQueryGet() { } void Maxwell3D::ProcessQueryCondition() { + if (rasterizer->AccelerateConditionalRendering()) { + execute_on = true; + return; + } const GPUVAddr condition_address{regs.render_enable.Address()}; switch (regs.render_enable_override) { case Regs::RenderEnable::Override::AlwaysRender: @@ -553,10 +545,6 @@ void Maxwell3D::ProcessQueryCondition() { execute_on = false; break; case Regs::RenderEnable::Override::UseRenderEnable: { - if (rasterizer->AccelerateConditionalRendering()) { - execute_on = true; - return; - } switch (regs.render_enable.mode) { case Regs::RenderEnable::Mode::True: { execute_on = true; @@ -598,15 +586,9 @@ void Maxwell3D::ProcessQueryCondition() { } void Maxwell3D::ProcessCounterReset() { -#if ANDROID - if (!Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - return; - } -#endif switch (regs.clear_report_value) { case Regs::ClearReport::ZPassPixelCount: - rasterizer->ResetCounter(QueryType::SamplesPassed); + rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); break; default: LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); @@ -620,28 +602,6 @@ void Maxwell3D::ProcessSyncPoint() { rasterizer->SignalSyncPoint(sync_point); } -std::optional<u64> Maxwell3D::GetQueryResult() { - switch (regs.report_semaphore.query.report) { - case Regs::ReportSemaphore::Report::Payload: - return regs.report_semaphore.payload; - case Regs::ReportSemaphore::Report::ZPassPixelCount64: -#if ANDROID - if (!Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - return 120; - } -#endif - // Deferred. - rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed, - system.GPU().GetTicks()); - return std::nullopt; - default: - LOG_DEBUG(HW_GPU, "Unimplemented query report type {}", - regs.report_semaphore.query.report.Value()); - return 1; - } -} - void Maxwell3D::ProcessCBBind(size_t stage_index) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader // stage. diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 6c19354e1..17faacc37 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3182,9 +3182,6 @@ private: /// Handles writes to syncing register. void ProcessSyncPoint(); - /// Returns a query's value or an empty object if the value will be deferred through a cache. - std::optional<u64> GetQueryResult(); - void RefreshParametersImpl(); bool IsMethodExecutable(u32 method); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index da8eab7ee..422d4d859 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -109,10 +109,11 @@ void MaxwellDMA::Launch() { const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { ASSERT(regs.remap_const.component_size_minus_one == 3); - accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); + accelerate.BufferClear(regs.offset_out, regs.line_length_in, + regs.remap_const.remap_consta_value); read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in); - std::ranges::fill(span, regs.remap_consta_value); + std::ranges::fill(span, regs.remap_const.remap_consta_value); memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(read_buffer.data()), regs.line_length_in * sizeof(u32)); @@ -361,21 +362,17 @@ void MaxwellDMA::ReleaseSemaphore() { const auto type = regs.launch_dma.semaphore_type; const GPUVAddr address = regs.semaphore.address; const u32 payload = regs.semaphore.payload; + VideoCommon::QueryPropertiesFlags flags{VideoCommon::QueryPropertiesFlags::IsAFence}; switch (type) { case LaunchDMA::SemaphoreType::NONE: break; case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { - std::function<void()> operation( - [this, address, payload] { memory_manager.Write<u32>(address, payload); }); - rasterizer->SignalFence(std::move(operation)); + rasterizer->Query(address, VideoCommon::QueryType::Payload, flags, payload, 0); break; } case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { - std::function<void()> operation([this, address, payload] { - memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks()); - memory_manager.Write<u64>(address, payload); - }); - rasterizer->SignalFence(std::move(operation)); + rasterizer->Query(address, VideoCommon::QueryType::Payload, + flags | VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); break; } default: diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 69e26cb32..1a43e24b6 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -214,14 +214,15 @@ public: NO_WRITE = 6, }; - PackedGPUVAddr address; + u32 remap_consta_value; + u32 remap_constb_value; union { + BitField<0, 12, u32> dst_components_raw; BitField<0, 3, Swizzle> dst_x; BitField<4, 3, Swizzle> dst_y; BitField<8, 3, Swizzle> dst_z; BitField<12, 3, Swizzle> dst_w; - BitField<0, 12, u32> dst_components_raw; BitField<16, 2, u32> component_size_minus_one; BitField<20, 2, u32> num_src_components_minus_one; BitField<24, 2, u32> num_dst_components_minus_one; @@ -274,55 +275,57 @@ private: struct Regs { union { struct { - u32 reserved[0x40]; + INSERT_PADDING_BYTES_NOINIT(0x100); u32 nop; - u32 reserved01[0xf]; + INSERT_PADDING_BYTES_NOINIT(0x3C); u32 pm_trigger; - u32 reserved02[0x3f]; + INSERT_PADDING_BYTES_NOINIT(0xFC); Semaphore semaphore; - u32 reserved03[0x2]; + INSERT_PADDING_BYTES_NOINIT(0x8); RenderEnable render_enable; PhysMode src_phys_mode; PhysMode dst_phys_mode; - u32 reserved04[0x26]; + INSERT_PADDING_BYTES_NOINIT(0x98); LaunchDMA launch_dma; - u32 reserved05[0x3f]; + INSERT_PADDING_BYTES_NOINIT(0xFC); PackedGPUVAddr offset_in; PackedGPUVAddr offset_out; s32 pitch_in; s32 pitch_out; u32 line_length_in; u32 line_count; - u32 reserved06[0xb6]; - u32 remap_consta_value; - u32 remap_constb_value; + INSERT_PADDING_BYTES_NOINIT(0x2E0); RemapConst remap_const; DMA::Parameters dst_params; - u32 reserved07[0x1]; + INSERT_PADDING_BYTES_NOINIT(0x4); DMA::Parameters src_params; - u32 reserved08[0x275]; + INSERT_PADDING_BYTES_NOINIT(0x9D4); u32 pm_trigger_end; - u32 reserved09[0x3ba]; + INSERT_PADDING_BYTES_NOINIT(0xEE8); }; std::array<u32, NUM_REGS> reg_array; }; } regs{}; + static_assert(sizeof(Regs) == NUM_REGS * 4); #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \ + static_assert(offsetof(MaxwellDMA::Regs, field_name) == position, \ "Field " #field_name " has invalid position") - ASSERT_REG_POSITION(launch_dma, 0xC0); - ASSERT_REG_POSITION(offset_in, 0x100); - ASSERT_REG_POSITION(offset_out, 0x102); - ASSERT_REG_POSITION(pitch_in, 0x104); - ASSERT_REG_POSITION(pitch_out, 0x105); - ASSERT_REG_POSITION(line_length_in, 0x106); - ASSERT_REG_POSITION(line_count, 0x107); - ASSERT_REG_POSITION(remap_const, 0x1C0); - ASSERT_REG_POSITION(dst_params, 0x1C3); - ASSERT_REG_POSITION(src_params, 0x1CA); - + ASSERT_REG_POSITION(semaphore, 0x240); + ASSERT_REG_POSITION(render_enable, 0x254); + ASSERT_REG_POSITION(src_phys_mode, 0x260); + ASSERT_REG_POSITION(launch_dma, 0x300); + ASSERT_REG_POSITION(offset_in, 0x400); + ASSERT_REG_POSITION(offset_out, 0x408); + ASSERT_REG_POSITION(pitch_in, 0x410); + ASSERT_REG_POSITION(pitch_out, 0x414); + ASSERT_REG_POSITION(line_length_in, 0x418); + ASSERT_REG_POSITION(line_count, 0x41C); + ASSERT_REG_POSITION(remap_const, 0x700); + ASSERT_REG_POSITION(dst_params, 0x70C); + ASSERT_REG_POSITION(src_params, 0x728); + ASSERT_REG_POSITION(pm_trigger_end, 0x1114); #undef ASSERT_REG_POSITION }; diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 6de2543b7..8dd34c04a 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -82,10 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { if (op == GpuSemaphoreOperation::WriteLong) { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_sequence; - [this, sequence_address, payload] { - memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks()); - memory_manager.Write<u64>(sequence_address, payload); - }(); + rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, + VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); } else { do { const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; @@ -120,10 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { void Puller::ProcessSemaphoreRelease() { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_release; - std::function<void()> operation([this, sequence_address, payload] { - memory_manager.Write<u32>(sequence_address, payload); - }); - rasterizer->SignalFence(std::move(operation)); + rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, + VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); } void Puller::ProcessSemaphoreAcquire() { @@ -132,7 +128,6 @@ void Puller::ProcessSemaphoreAcquire() { while (word != value) { regs.acquire_active = true; regs.acquire_value = value; - std::this_thread::sleep_for(std::chrono::milliseconds(1)); rasterizer->ReleaseFences(); word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); // TODO(kemathe73) figure out how to do the acquire_timeout |