From 98e3274935c1de94f3e54c145b20d3f08b525647 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Tue, 13 Jan 2015 23:52:59 -0200 Subject: GPU: Fire GPU interrupts at the correct places. PDC0 and PDC1 are both VBlank interrupts. PDC0 was being treated as a HBlank interrupt and fired many more times than it should. They now both fire together at 60 Hz. This puzzlingly *improves* apparent framerate on many applications. A few other interrupts were being fired inside the GSP command processing instead of on the actual GPU register writes, so they were moved there, which should cover direct writes tho those registers not going through the GX command queue. --- src/core/hle/service/gsp_gpu.cpp | 6 ------ src/core/hw/gpu.cpp | 33 ++++++++++++++++++--------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 2b115240f..9504ab5bb 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -269,8 +269,6 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), Memory::VirtualToPhysicalAddress(params.end2) >> 3); WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].size), params.end2 - params.start2); WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].value), params.value2); - - SignalInterrupt(InterruptId::PSC0); break; } @@ -284,10 +282,6 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags); WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1); - // TODO(bunnei): Determine if these interrupts should be signalled here. - SignalInterrupt(InterruptId::PSC1); - SignalInterrupt(InterruptId::PPF); - // Update framebuffer information if requested for (int screen_id = 0; screen_id < 2; ++screen_id) { FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 3b730a0de..ad39fdc49 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -27,8 +27,6 @@ Regs g_regs; bool g_skip_frame = false; ///< True if the current frame was skipped static u64 frame_ticks = 0; ///< 268MHz / gpu_refresh_rate frames per second -static u64 line_ticks = 0; ///< Number of ticks for a screen line -static u32 cur_line = 0; ///< Current screen line static u64 last_update_tick = 0; ///< CPU ticl count from last GPU update static u64 frame_count = 0; ///< Number of frames drawn static bool last_skip_frame = false; ///< True if the last frame was skipped @@ -79,6 +77,12 @@ inline void Write(u32 addr, const T data) { *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); + + if (!is_second_filler) { + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); + } else { + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); + } } break; } @@ -152,6 +156,8 @@ inline void Write(u32 addr, const T data) { config.GetPhysicalInputAddress(), (u32)config.input_width, (u32)config.input_height, config.GetPhysicalOutputAddress(), (u32)config.output_width, (u32)config.output_height, config.output_format.Value()); + + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); } break; } @@ -193,20 +199,14 @@ void Update() { // blank, we need to simulate it. Based on testing, it seems that retail applications work more // accurately when this is signalled between thread switches. - if (HLE::g_reschedule) { - u64 current_ticks = Core::g_app_core->GetTicks(); - u32 num_lines = static_cast((current_ticks - last_update_tick) / line_ticks); + u64 current_ticks = Core::g_app_core->GetTicks(); - // Synchronize line... - if (num_lines > 0) { - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); - cur_line += num_lines; - last_update_tick += (num_lines * line_ticks); - } + + if (HLE::g_reschedule) { // Synchronize frame... - if (cur_line >= framebuffer_top.height) { - cur_line = 0; + if ((current_ticks - last_update_tick) >= frame_ticks) { + last_update_tick += frame_ticks; frame_count++; last_skip_frame = g_skip_frame; g_skip_frame = (frame_count & Settings::values.frame_skip) != 0; @@ -223,6 +223,11 @@ void Update() { } // Signal to GSP that GPU interrupt has occurred + // TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub + // screen, or if both use the same interrupts and these two instead determine the + // beginning and end of the VBlank period. If needed, split the interrupt firing into + // two different intervals. + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); // TODO(bunnei): Fake a DSP interrupt on each frame. This does not belong here, but @@ -264,8 +269,6 @@ void Init() { framebuffer_sub.active_fb = 0; frame_ticks = 268123480 / Settings::values.gpu_refresh_rate; - line_ticks = (GPU::frame_ticks / framebuffer_top.height); - cur_line = 0; last_update_tick = Core::g_app_core->GetTicks(); last_skip_frame = false; g_skip_frame = false; -- cgit v1.2.3 From 5961a2852dc2b603a896afd9b38b0ded26503849 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Tue, 13 Jan 2015 23:55:56 -0200 Subject: GSP: Update framebuffer info on all interrupts Hardware testing determined that the GSP processes shared memory framebuffer update info even when no memory transfer or filling GX commands are used. They are now updated on every interrupt, which isn't confirmed correct but matches hardware behaviour more closely. This also reverts the hack introduced in #404. It made a few games behave better, but I believe it's incorrect and also breaks other games. --- src/core/hle/service/gsp_gpu.cpp | 25 +++++++++++----------- src/video_core/renderer_opengl/renderer_opengl.cpp | 4 +--- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 9504ab5bb..00a941658 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -218,6 +218,19 @@ void SignalInterrupt(InterruptId interrupt_id) { interrupt_relay_queue->slot[next] = interrupt_id; interrupt_relay_queue->error_code = 0x0; // No error + + // Update framebuffer information if requested + // TODO(yuriks): Confirm where this code should be called. It is definitely updated without + // executing any GSP commands, only waiting on the event. + for (int screen_id = 0; screen_id < 2; ++screen_id) { + FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); + + if (info->is_dirty) { + SetBufferSwap(screen_id, info->framebuffer_info[info->index]); + } + + info->is_dirty = false; + } } Kernel::SignalEvent(g_interrupt_event); } @@ -281,18 +294,6 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size); WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags); WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1); - - // Update framebuffer information if requested - for (int screen_id = 0; screen_id < 2; ++screen_id) { - FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); - - if (info->is_dirty) { - SetBufferSwap(screen_id, info->framebuffer_info[info->index]); - info->framebuffer_info->active_fb = info->framebuffer_info->active_fb ^ 1; - } - - info->is_dirty = false; - } break; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 29d220e8d..aa47bd616 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -88,10 +88,8 @@ void RendererOpenGL::SwapBuffers() { void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, const TextureInfo& texture) { - // TODO: Why are active_fb and the valid framebuffer flipped compared to 3dbrew documentation - // and GSP definitions? const VAddr framebuffer_vaddr = Memory::PhysicalToVirtualAddress( - framebuffer.active_fb == 0 ? framebuffer.address_left2 : framebuffer.address_left1); + framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2); LOG_TRACE(Render_OpenGL, "0x%08x bytes from 0x%08x(%dx%d), fmt %x", framebuffer.stride * framebuffer.height, -- cgit v1.2.3 From e29dd76e12d31ed3927d4860c0bcc3d808443932 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Tue, 13 Jan 2015 23:57:45 -0200 Subject: GPU: Correct wrong default framebuffer address for sub-screen. It appears this is a mistake, since the sub-screen has no right framebuffer. --- src/core/hw/gpu.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index ad39fdc49..49136b7e1 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -252,8 +252,8 @@ void Init() { framebuffer_top.address_right1 = 0x18273000; framebuffer_top.address_right2 = 0x182B9800; framebuffer_sub.address_left1 = 0x1848F000; - //framebuffer_sub.address_left2 = unknown; - framebuffer_sub.address_right1 = 0x184C7800; + framebuffer_sub.address_left2 = 0x184C7800; + //framebuffer_sub.address_right1 = unknown; //framebuffer_sub.address_right2 = unknown; framebuffer_top.width = 240; -- cgit v1.2.3 From 9e084826b8764853e363593ba8793e7b17eaa4f8 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Wed, 14 Jan 2015 01:19:08 -0200 Subject: GPU: Do periodic VBlank updates using CoreTiming --- src/core/hw/gpu.cpp | 91 ++++++++++++++++++++++++++--------------------------- src/core/hw/gpu.h | 3 -- src/core/hw/hw.cpp | 1 - 3 files changed, 44 insertions(+), 51 deletions(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 49136b7e1..256e11c37 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -9,6 +9,7 @@ #include "core/settings.h" #include "core/core.h" #include "core/mem_map.h" +#include "core/core_timing.h" #include "core/hle/hle.h" #include "core/hle/service/gsp_gpu.h" @@ -24,12 +25,17 @@ namespace GPU { Regs g_regs; -bool g_skip_frame = false; ///< True if the current frame was skipped +/// True if the current frame was skipped +bool g_skip_frame = false; -static u64 frame_ticks = 0; ///< 268MHz / gpu_refresh_rate frames per second -static u64 last_update_tick = 0; ///< CPU ticl count from last GPU update -static u64 frame_count = 0; ///< Number of frames drawn -static bool last_skip_frame = false; ///< True if the last frame was skipped +/// 268MHz / gpu_refresh_rate frames per second +static u64 frame_ticks; +/// Event id for CoreTiming +static int vblank_event; +/// Total number of frames drawn +static u64 frame_count; +/// True if the last frame was skipped +static bool last_skip_frame = false; template inline void Read(T &var, const u32 raw_addr) { @@ -192,50 +198,39 @@ template void Write(u32 addr, const u16 data); template void Write(u32 addr, const u8 data); /// Update hardware -void Update() { +static void VBlankCallback(u64 userdata, int cycles_late) { auto& framebuffer_top = g_regs.framebuffer_config[0]; - // Synchronize GPU on a thread reschedule: Because we cannot accurately predict a vertical - // blank, we need to simulate it. Based on testing, it seems that retail applications work more - // accurately when this is signalled between thread switches. - - u64 current_ticks = Core::g_app_core->GetTicks(); - - - if (HLE::g_reschedule) { - - // Synchronize frame... - if ((current_ticks - last_update_tick) >= frame_ticks) { - last_update_tick += frame_ticks; - frame_count++; - last_skip_frame = g_skip_frame; - g_skip_frame = (frame_count & Settings::values.frame_skip) != 0; - - // Swap buffers based on the frameskip mode, which is a little bit tricky. When - // a frame is being skipped, nothing is being rendered to the internal framebuffer(s). - // So, we should only swap frames if the last frame was rendered. The rules are: - // - If frameskip == 0 (disabled), always swap buffers - // - If frameskip == 1, swap buffers every other frame (starting from the first frame) - // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) - if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || - Settings::values.frame_skip == 0) { - VideoCore::g_renderer->SwapBuffers(); - } - - // Signal to GSP that GPU interrupt has occurred - // TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub - // screen, or if both use the same interrupts and these two instead determine the - // beginning and end of the VBlank period. If needed, split the interrupt firing into - // two different intervals. - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); - - // TODO(bunnei): Fake a DSP interrupt on each frame. This does not belong here, but - // until we can emulate DSP interrupts, this is probably the only reasonable place to do - // this. Certain games expect this to be periodically signaled. - DSP_DSP::SignalInterrupt(); - } + frame_count++; + last_skip_frame = g_skip_frame; + g_skip_frame = (frame_count & Settings::values.frame_skip) != 0; + + // Swap buffers based on the frameskip mode, which is a little bit tricky. When + // a frame is being skipped, nothing is being rendered to the internal framebuffer(s). + // So, we should only swap frames if the last frame was rendered. The rules are: + // - If frameskip == 0 (disabled), always swap buffers + // - If frameskip == 1, swap buffers every other frame (starting from the first frame) + // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) + if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || + Settings::values.frame_skip == 0) { + VideoCore::g_renderer->SwapBuffers(); } + + // Signal to GSP that GPU interrupt has occurred + // TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub + // screen, or if both use the same interrupts and these two instead determine the + // beginning and end of the VBlank period. If needed, split the interrupt firing into + // two different intervals. + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); + + // TODO(bunnei): Fake a DSP interrupt on each frame. This does not belong here, but + // until we can emulate DSP interrupts, this is probably the only reasonable place to do + // this. Certain games expect this to be periodically signaled. + DSP_DSP::SignalInterrupt(); + + // Reschedule recurrent event + CoreTiming::ScheduleEvent(frame_ticks - cycles_late, vblank_event); } /// Initialize hardware @@ -269,10 +264,12 @@ void Init() { framebuffer_sub.active_fb = 0; frame_ticks = 268123480 / Settings::values.gpu_refresh_rate; - last_update_tick = Core::g_app_core->GetTicks(); last_skip_frame = false; g_skip_frame = false; + vblank_event = CoreTiming::RegisterEvent("GPU::VBlankCallback", VBlankCallback); + CoreTiming::ScheduleEvent(frame_ticks, vblank_event); + LOG_DEBUG(HW_GPU, "initialized OK"); } diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 7de055232..7c3a17ee5 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -252,9 +252,6 @@ void Read(T &var, const u32 addr); template void Write(u32 addr, const T data); -/// Update hardware -void Update(); - /// Initialize hardware void Init(); diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp index 848ab5348..503200629 100644 --- a/src/core/hw/hw.cpp +++ b/src/core/hw/hw.cpp @@ -75,7 +75,6 @@ template void Write(u32 addr, const u8 data); /// Update hardware void Update() { - GPU::Update(); } /// Initialize hardware -- cgit v1.2.3 From 7630b3167210cc7f84c83af54dde23be948e3bc5 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Wed, 14 Jan 2015 03:26:27 -0200 Subject: GSP: Fix appending of interrupts to the shared memory buffer The code was previously appending the interrupt to after the end of the buffer, instead of at the end. --- src/core/hle/service/gsp_gpu.cpp | 4 ++-- src/core/hle/service/gsp_gpu.h | 25 ++++++++++--------------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 00a941658..4ca2b9bd0 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -210,12 +210,12 @@ void SignalInterrupt(InterruptId interrupt_id) { } for (int thread_id = 0; thread_id < 0x4; ++thread_id) { InterruptRelayQueue* interrupt_relay_queue = GetInterruptRelayQueue(thread_id); - interrupt_relay_queue->number_interrupts = interrupt_relay_queue->number_interrupts + 1; - u8 next = interrupt_relay_queue->index; next += interrupt_relay_queue->number_interrupts; next = next % 0x34; // 0x34 is the number of interrupt slots + interrupt_relay_queue->number_interrupts += 1; + interrupt_relay_queue->slot[next] = interrupt_id; interrupt_relay_queue->error_code = 0x0; // No error diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h index 932b6170f..65abb194a 100644 --- a/src/core/hle/service/gsp_gpu.h +++ b/src/core/hle/service/gsp_gpu.h @@ -45,21 +45,16 @@ enum class CommandId : u32 { /// GSP thread interrupt relay queue struct InterruptRelayQueue { - union { - u32 hex; - - // Index of last interrupt in the queue - BitField<0,8,u32> index; - - // Number of interrupts remaining to be processed by the userland code - BitField<8,8,u32> number_interrupts; - - // Error code - zero on success, otherwise an error has occurred - BitField<16,8,u32> error_code; - }; - - u32 unk0; - u32 unk1; + // Index of last interrupt in the queue + u8 index; + // Number of interrupts remaining to be processed by the userland code + u8 number_interrupts; + // Error code - zero on success, otherwise an error has occurred + u8 error_code; + u8 padding1; + + u32 missed_PDC0; + u32 missed_PDC1; InterruptId slot[0x34]; ///< Interrupt ID slots }; -- cgit v1.2.3 From a09f71521e6a20de86e9b31451c82c3df5c301a0 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Wed, 14 Jan 2015 05:03:14 -0200 Subject: GPU: Fix buffer overrun in Display Transfers Display transfers with the horizontal downscaling flag were calculating the wrong output size, causing them to write double the amount of data intended. It is likely that this was perceived as correct due to a separate bug in calculating source indices which caused the image to be padded unless the previous bug was present. This fixes both issues, correcting flickering issues in 3dscraft, blargSnes and more (caused by the transfer overwriting the back buffer which followed) as well as potentially fixing other crashes. --- src/core/hw/gpu.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 256e11c37..58eec3005 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -100,22 +100,25 @@ inline void Write(u32 addr, const T data) { u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress())); u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress())); + // Cheap emulation of horizontal scaling: Just skip each second pixel of the + // input framebuffer. We keep track of this in the pixel_skip variable. + unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; + + u32 output_width = config.output_width / pixel_skip; + for (u32 y = 0; y < config.output_height; ++y) { // TODO: Why does the register seem to hold twice the framebuffer width? - for (u32 x = 0; x < config.output_width; ++x) { + + for (u32 x = 0; x < output_width; ++x) { struct { int r, g, b, a; } source_color = { 0, 0, 0, 0 }; - // Cheap emulation of horizontal scaling: Just skip each second pixel of the - // input framebuffer. We keep track of this in the pixel_skip variable. - unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; - switch (config.input_format) { case Regs::PixelFormat::RGBA8: { // TODO: Most likely got the component order messed up. - u8* srcptr = source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip; + u8* srcptr = source_pointer + (x * pixel_skip + y * config.input_width) * 4; source_color.r = srcptr[0]; // blue source_color.g = srcptr[1]; // green source_color.b = srcptr[2]; // red @@ -143,7 +146,7 @@ inline void Write(u32 addr, const T data) { case Regs::PixelFormat::RGB8: { // TODO: Most likely got the component order messed up. - u8* dstptr = dest_pointer + x * 3 + y * config.output_width * 3; + u8* dstptr = dest_pointer + (x + y * output_width) * 3; dstptr[0] = source_color.r; // blue dstptr[1] = source_color.g; // green dstptr[2] = source_color.b; // red @@ -158,9 +161,9 @@ inline void Write(u32 addr, const T data) { } LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x", - config.output_height * config.output_width * 4, + config.output_height * output_width * 4, config.GetPhysicalInputAddress(), (u32)config.input_width, (u32)config.input_height, - config.GetPhysicalOutputAddress(), (u32)config.output_width, (u32)config.output_height, + config.GetPhysicalOutputAddress(), (u32)output_width, (u32)config.output_height, config.output_format.Value()); GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); -- cgit v1.2.3