summaryrefslogtreecommitdiffstats
path: root/src/core/hw/gpu.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/hw/gpu.cpp')
-rw-r--r--src/core/hw/gpu.cpp214
1 files changed, 138 insertions, 76 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 7471def57..3ccbc03b2 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -2,17 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cstring>
+#include <type_traits>
+
#include "common/color.h"
#include "common/common_types.h"
-
-#include "core/arm/arm_interface.h"
+#include "common/logging/log.h"
+#include "common/vector_math.h"
#include "core/settings.h"
-#include "core/core.h"
#include "core/memory.h"
#include "core/core_timing.h"
-#include "core/hle/hle.h"
#include "core/hle/service/gsp_gpu.h"
#include "core/hle/service/dsp_dsp.h"
#include "core/hle/service/hid/hid.h"
@@ -20,10 +21,17 @@
#include "core/hw/hw.h"
#include "core/hw/gpu.h"
+#include "core/tracer/recorder.h"
+
#include "video_core/command_processor.h"
+#include "video_core/hwrasterizer_base.h"
+#include "video_core/renderer_base.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
+#include "video_core/debug_utils/debug_utils.h"
+
+
namespace GPU {
Regs g_regs;
@@ -53,6 +61,29 @@ inline void Read(T &var, const u32 raw_addr) {
var = g_regs[addr / 4];
}
+static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) {
+ switch (input_format) {
+ case Regs::PixelFormat::RGBA8:
+ return Color::DecodeRGBA8(src_pixel);
+
+ case Regs::PixelFormat::RGB8:
+ return Color::DecodeRGB8(src_pixel);
+
+ case Regs::PixelFormat::RGB565:
+ return Color::DecodeRGB565(src_pixel);
+
+ case Regs::PixelFormat::RGB5A1:
+ return Color::DecodeRGB5A1(src_pixel);
+
+ case Regs::PixelFormat::RGBA4:
+ return Color::DecodeRGBA4(src_pixel);
+
+ default:
+ LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", input_format);
+ return {0, 0, 0, 0};
+ }
+}
+
template <typename T>
inline void Write(u32 addr, const T data) {
addr -= HW::VADDR_GPU;
@@ -75,39 +106,43 @@ inline void Write(u32 addr, const T data) {
const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
auto& config = g_regs.memory_fill_config[is_second_filler];
- if (config.address_start && config.trigger) {
- u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
- u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
-
- if (config.fill_24bit) {
- // fill with 24-bit values
- for (u8* ptr = start; ptr < end; ptr += 3) {
- ptr[0] = config.value_24bit_r;
- ptr[1] = config.value_24bit_g;
- ptr[2] = config.value_24bit_b;
+ if (config.trigger) {
+ if (config.address_start) { // Some games pass invalid values here
+ u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
+ u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
+
+ if (config.fill_24bit) {
+ // fill with 24-bit values
+ for (u8* ptr = start; ptr < end; ptr += 3) {
+ ptr[0] = config.value_24bit_r;
+ ptr[1] = config.value_24bit_g;
+ ptr[2] = config.value_24bit_b;
+ }
+ } else if (config.fill_32bit) {
+ // fill with 32-bit values
+ for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
+ *ptr = config.value_32bit;
+ } else {
+ // fill with 16-bit values
+ for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
+ *ptr = config.value_16bit;
}
- } else if (config.fill_32bit) {
- // fill with 32-bit values
- for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
- *ptr = config.value_32bit;
- } else {
- // fill with 16-bit values
- for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
- *ptr = config.value_16bit;
- }
- LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
+ LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
- config.trigger = 0;
- config.finished = 1;
+ if (!is_second_filler) {
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
+ } else {
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
+ }
- if (!is_second_filler) {
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
- } else {
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
+ VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
}
- VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
+ // Reset "trigger" flag and set the "finish" flag
+ // NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
+ config.trigger = 0;
+ config.finished = 1;
}
break;
}
@@ -116,6 +151,10 @@ inline void Write(u32 addr, const T data) {
{
const auto& config = g_regs.display_transfer_config;
if (config.trigger & 1) {
+
+ if (Pica::g_debug_context)
+ Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
+
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
@@ -125,11 +164,18 @@ inline void Write(u32 addr, const T data) {
break;
}
- unsigned horizontal_scale = (config.scaling != config.NoScale) ? 2 : 1;
- unsigned vertical_scale = (config.scaling == config.ScaleXY) ? 2 : 1;
+ if (config.output_tiled &&
+ (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) {
+ LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
+ UNIMPLEMENTED();
+ break;
+ }
- u32 output_width = config.output_width / horizontal_scale;
- u32 output_height = config.output_height / vertical_scale;
+ bool horizontal_scale = config.scaling != config.NoScale;
+ bool vertical_scale = config.scaling == config.ScaleXY;
+
+ u32 output_width = config.output_width >> horizontal_scale;
+ u32 output_height = config.output_height >> vertical_scale;
u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
@@ -153,16 +199,14 @@ inline void Write(u32 addr, const T data) {
break;
}
- // TODO(Subv): Implement the box filter when scaling is enabled
- // right now we're just skipping the extra pixels.
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
- Math::Vec4<u8> src_color = { 0, 0, 0, 0 };
+ Math::Vec4<u8> src_color;
// Calculate the [x,y] position of the input image
// based on the current output position and the scale
- u32 input_x = x * horizontal_scale;
- u32 input_y = y * vertical_scale;
+ u32 input_x = x << horizontal_scale;
+ u32 input_y = y << vertical_scale;
if (config.flip_vertically) {
// Flip the y value of the output data,
@@ -177,46 +221,49 @@ inline void Write(u32 addr, const T data) {
u32 dst_offset;
if (config.output_tiled) {
- // Interpret the input as linear and the output as tiled
- u32 coarse_y = y & ~7;
- u32 stride = output_width * dst_bytes_per_pixel;
-
- src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
- dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
+ if (!config.dont_swizzle) {
+ // Interpret the input as linear and the output as tiled
+ u32 coarse_y = y & ~7;
+ u32 stride = output_width * dst_bytes_per_pixel;
+
+ src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+ dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
+ } else {
+ // Both input and output are linear
+ src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+ dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
+ }
} else {
- // Interpret the input as tiled and the output as linear
- u32 coarse_y = input_y & ~7;
- u32 stride = config.input_width * src_bytes_per_pixel;
-
- src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
- dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
+ if (!config.dont_swizzle) {
+ // Interpret the input as tiled and the output as linear
+ u32 coarse_y = input_y & ~7;
+ u32 stride = config.input_width * src_bytes_per_pixel;
+
+ src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
+ dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
+ } else {
+ // Both input and output are tiled
+ u32 out_coarse_y = y & ~7;
+ u32 out_stride = output_width * dst_bytes_per_pixel;
+
+ u32 in_coarse_y = input_y & ~7;
+ u32 in_stride = config.input_width * src_bytes_per_pixel;
+
+ src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
+ dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
+ }
}
const u8* src_pixel = src_pointer + src_offset;
- switch (config.input_format) {
- case Regs::PixelFormat::RGBA8:
- src_color = Color::DecodeRGBA8(src_pixel);
- break;
-
- case Regs::PixelFormat::RGB8:
- src_color = Color::DecodeRGB8(src_pixel);
- break;
-
- case Regs::PixelFormat::RGB565:
- src_color = Color::DecodeRGB565(src_pixel);
- break;
-
- case Regs::PixelFormat::RGB5A1:
- src_color = Color::DecodeRGB5A1(src_pixel);
- break;
-
- case Regs::PixelFormat::RGBA4:
- src_color = Color::DecodeRGBA4(src_pixel);
- break;
-
- default:
- LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", config.input_format.Value());
- break;
+ src_color = DecodePixel(config.input_format, src_pixel);
+ if (config.scaling == config.ScaleX) {
+ Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
+ src_color = ((src_color + pixel) / 2).Cast<u8>();
+ } else if (config.scaling == config.ScaleXY) {
+ Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
+ Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
+ Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
+ src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
}
u8* dst_pixel = dst_pointer + dst_offset;
@@ -254,6 +301,7 @@ inline void Write(u32 addr, const T data) {
config.GetPhysicalOutputAddress(), output_width, output_height,
config.output_format.Value(), config.flags);
+ g_regs.display_transfer_config.trigger = 0;
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size);
@@ -268,7 +316,14 @@ inline void Write(u32 addr, const T data) {
if (config.trigger & 1)
{
u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
+
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
+ }
+
Pica::CommandProcessor::ProcessCommandList(buffer, config.size);
+
+ g_regs.command_processor_config.trigger = 0;
}
break;
}
@@ -276,6 +331,13 @@ inline void Write(u32 addr, const T data) {
default:
break;
}
+
+ // Notify tracer about the register write
+ // This is happening *after* handling the write to make sure we properly catch all memory reads.
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ // addr + GPU VBase - IO VBase + IO PBase
+ Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
+ }
}
// Explicitly instantiate template functions because we aren't defining this in the header: