diff options
Diffstat (limited to 'src/core/hw/gpu.cpp')
-rw-r--r-- | src/core/hw/gpu.cpp | 214 |
1 files changed, 138 insertions, 76 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 7471def57..3ccbc03b2 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -2,17 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> +#include <type_traits> + #include "common/color.h" #include "common/common_types.h" - -#include "core/arm/arm_interface.h" +#include "common/logging/log.h" +#include "common/vector_math.h" #include "core/settings.h" -#include "core/core.h" #include "core/memory.h" #include "core/core_timing.h" -#include "core/hle/hle.h" #include "core/hle/service/gsp_gpu.h" #include "core/hle/service/dsp_dsp.h" #include "core/hle/service/hid/hid.h" @@ -20,10 +21,17 @@ #include "core/hw/hw.h" #include "core/hw/gpu.h" +#include "core/tracer/recorder.h" + #include "video_core/command_processor.h" +#include "video_core/hwrasterizer_base.h" +#include "video_core/renderer_base.h" #include "video_core/utils.h" #include "video_core/video_core.h" +#include "video_core/debug_utils/debug_utils.h" + + namespace GPU { Regs g_regs; @@ -53,6 +61,29 @@ inline void Read(T &var, const u32 raw_addr) { var = g_regs[addr / 4]; } +static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) { + switch (input_format) { + case Regs::PixelFormat::RGBA8: + return Color::DecodeRGBA8(src_pixel); + + case Regs::PixelFormat::RGB8: + return Color::DecodeRGB8(src_pixel); + + case Regs::PixelFormat::RGB565: + return Color::DecodeRGB565(src_pixel); + + case Regs::PixelFormat::RGB5A1: + return Color::DecodeRGB5A1(src_pixel); + + case Regs::PixelFormat::RGBA4: + return Color::DecodeRGBA4(src_pixel); + + default: + LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", input_format); + return {0, 0, 0, 0}; + } +} + template <typename T> inline void Write(u32 addr, const T data) { addr -= HW::VADDR_GPU; @@ -75,39 +106,43 @@ inline void Write(u32 addr, const T data) { const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); auto& config = g_regs.memory_fill_config[is_second_filler]; - if (config.address_start && config.trigger) { - u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); - u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); - - if (config.fill_24bit) { - // fill with 24-bit values - for (u8* ptr = start; ptr < end; ptr += 3) { - ptr[0] = config.value_24bit_r; - ptr[1] = config.value_24bit_g; - ptr[2] = config.value_24bit_b; + if (config.trigger) { + if (config.address_start) { // Some games pass invalid values here + u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); + u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); + + if (config.fill_24bit) { + // fill with 24-bit values + for (u8* ptr = start; ptr < end; ptr += 3) { + ptr[0] = config.value_24bit_r; + ptr[1] = config.value_24bit_g; + ptr[2] = config.value_24bit_b; + } + } else if (config.fill_32bit) { + // fill with 32-bit values + for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr) + *ptr = config.value_32bit; + } else { + // fill with 16-bit values + for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr) + *ptr = config.value_16bit; } - } else if (config.fill_32bit) { - // fill with 32-bit values - for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr) - *ptr = config.value_32bit; - } else { - // fill with 16-bit values - for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr) - *ptr = config.value_16bit; - } - LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); + LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); - config.trigger = 0; - config.finished = 1; + if (!is_second_filler) { + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); + } else { + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); + } - if (!is_second_filler) { - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); - } else { - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); + VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); } - VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); + // Reset "trigger" flag and set the "finish" flag + // NOTE: This was confirmed to happen on hardware even if "address_start" is zero. + config.trigger = 0; + config.finished = 1; } break; } @@ -116,6 +151,10 @@ inline void Write(u32 addr, const T data) { { const auto& config = g_regs.display_transfer_config; if (config.trigger & 1) { + + if (Pica::g_debug_context) + Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); + u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); @@ -125,11 +164,18 @@ inline void Write(u32 addr, const T data) { break; } - unsigned horizontal_scale = (config.scaling != config.NoScale) ? 2 : 1; - unsigned vertical_scale = (config.scaling == config.ScaleXY) ? 2 : 1; + if (config.output_tiled && + (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) { + LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); + UNIMPLEMENTED(); + break; + } - u32 output_width = config.output_width / horizontal_scale; - u32 output_height = config.output_height / vertical_scale; + bool horizontal_scale = config.scaling != config.NoScale; + bool vertical_scale = config.scaling == config.ScaleXY; + + u32 output_width = config.output_width >> horizontal_scale; + u32 output_height = config.output_height >> vertical_scale; u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); @@ -153,16 +199,14 @@ inline void Write(u32 addr, const T data) { break; } - // TODO(Subv): Implement the box filter when scaling is enabled - // right now we're just skipping the extra pixels. for (u32 y = 0; y < output_height; ++y) { for (u32 x = 0; x < output_width; ++x) { - Math::Vec4<u8> src_color = { 0, 0, 0, 0 }; + Math::Vec4<u8> src_color; // Calculate the [x,y] position of the input image // based on the current output position and the scale - u32 input_x = x * horizontal_scale; - u32 input_y = y * vertical_scale; + u32 input_x = x << horizontal_scale; + u32 input_y = y << vertical_scale; if (config.flip_vertically) { // Flip the y value of the output data, @@ -177,46 +221,49 @@ inline void Write(u32 addr, const T data) { u32 dst_offset; if (config.output_tiled) { - // Interpret the input as linear and the output as tiled - u32 coarse_y = y & ~7; - u32 stride = output_width * dst_bytes_per_pixel; - - src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; - dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; + if (!config.dont_swizzle) { + // Interpret the input as linear and the output as tiled + u32 coarse_y = y & ~7; + u32 stride = output_width * dst_bytes_per_pixel; + + src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; + dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; + } else { + // Both input and output are linear + src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; + dst_offset = (x + y * output_width) * dst_bytes_per_pixel; + } } else { - // Interpret the input as tiled and the output as linear - u32 coarse_y = input_y & ~7; - u32 stride = config.input_width * src_bytes_per_pixel; - - src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; - dst_offset = (x + y * output_width) * dst_bytes_per_pixel; + if (!config.dont_swizzle) { + // Interpret the input as tiled and the output as linear + u32 coarse_y = input_y & ~7; + u32 stride = config.input_width * src_bytes_per_pixel; + + src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; + dst_offset = (x + y * output_width) * dst_bytes_per_pixel; + } else { + // Both input and output are tiled + u32 out_coarse_y = y & ~7; + u32 out_stride = output_width * dst_bytes_per_pixel; + + u32 in_coarse_y = input_y & ~7; + u32 in_stride = config.input_width * src_bytes_per_pixel; + + src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; + dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; + } } const u8* src_pixel = src_pointer + src_offset; - switch (config.input_format) { - case Regs::PixelFormat::RGBA8: - src_color = Color::DecodeRGBA8(src_pixel); - break; - - case Regs::PixelFormat::RGB8: - src_color = Color::DecodeRGB8(src_pixel); - break; - - case Regs::PixelFormat::RGB565: - src_color = Color::DecodeRGB565(src_pixel); - break; - - case Regs::PixelFormat::RGB5A1: - src_color = Color::DecodeRGB5A1(src_pixel); - break; - - case Regs::PixelFormat::RGBA4: - src_color = Color::DecodeRGBA4(src_pixel); - break; - - default: - LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", config.input_format.Value()); - break; + src_color = DecodePixel(config.input_format, src_pixel); + if (config.scaling == config.ScaleX) { + Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); + src_color = ((src_color + pixel) / 2).Cast<u8>(); + } else if (config.scaling == config.ScaleXY) { + Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); + Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); + Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); + src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); } u8* dst_pixel = dst_pointer + dst_offset; @@ -254,6 +301,7 @@ inline void Write(u32 addr, const T data) { config.GetPhysicalOutputAddress(), output_width, output_height, config.output_format.Value(), config.flags); + g_regs.display_transfer_config.trigger = 0; GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); @@ -268,7 +316,14 @@ inline void Write(u32 addr, const T data) { if (config.trigger & 1) { u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress()); + + if (Pica::g_debug_context && Pica::g_debug_context->recorder) { + Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress()); + } + Pica::CommandProcessor::ProcessCommandList(buffer, config.size); + + g_regs.command_processor_config.trigger = 0; } break; } @@ -276,6 +331,13 @@ inline void Write(u32 addr, const T data) { default: break; } + + // Notify tracer about the register write + // This is happening *after* handling the write to make sure we properly catch all memory reads. + if (Pica::g_debug_context && Pica::g_debug_context->recorder) { + // addr + GPU VBase - IO VBase + IO PBase + Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); + } } // Explicitly instantiate template functions because we aren't defining this in the header: |