diff options
Diffstat (limited to 'src/core/hw')
-rw-r--r-- | src/core/hw/gpu.cpp | 151 | ||||
-rw-r--r-- | src/core/hw/gpu.h | 85 | ||||
-rw-r--r-- | src/core/hw/hw.cpp | 17 | ||||
-rw-r--r-- | src/core/hw/hw.h | 42 | ||||
-rw-r--r-- | src/core/hw/lcd.cpp | 16 | ||||
-rw-r--r-- | src/core/hw/lcd.h | 20 | ||||
-rw-r--r-- | src/core/hw/y2r.cpp | 65 | ||||
-rw-r--r-- | src/core/hw/y2r.h | 3 |
8 files changed, 211 insertions, 188 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index a4dfb7e43..0e6b91e3a 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -5,34 +5,26 @@ #include <cstring> #include <numeric> #include <type_traits> - #include "common/color.h" #include "common/common_types.h" #include "common/logging/log.h" #include "common/microprofile.h" #include "common/vector_math.h" - -#include "core/settings.h" -#include "core/memory.h" #include "core/core_timing.h" - #include "core/hle/service/gsp_gpu.h" #include "core/hle/service/hid/hid.h" - -#include "core/hw/hw.h" #include "core/hw/gpu.h" - +#include "core/hw/hw.h" +#include "core/memory.h" +#include "core/settings.h" #include "core/tracer/recorder.h" - #include "video_core/command_processor.h" +#include "video_core/debug_utils/debug_utils.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" #include "video_core/utils.h" #include "video_core/video_core.h" -#include "video_core/debug_utils/debug_utils.h" - - namespace GPU { Regs g_regs; @@ -49,7 +41,7 @@ static u64 frame_count; static bool last_skip_frame; template <typename T> -inline void Read(T &var, const u32 raw_addr) { +inline void Read(T& var, const u32 raw_addr) { u32 addr = raw_addr - HW::VADDR_GPU; u32 index = addr / 4; @@ -105,8 +97,7 @@ inline void Write(u32 addr, const T data) { // Memory fills are triggered once the fill value is written. case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3): - case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): - { + case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): { const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); auto& config = g_regs.memory_fill_config[is_second_filler]; @@ -125,7 +116,9 @@ inline void Write(u32 addr, const T data) { // regions that were between surfaces or within the touching // ones for cpu to manually fill here. if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) { - Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); + Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), + config.GetEndAddress() - + config.GetStartAddress()); if (config.fill_24bit) { // fill with 24-bit values @@ -150,7 +143,8 @@ inline void Write(u32 addr, const T data) { } } - LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); + LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), + config.GetEndAddress()); if (!is_second_filler) { GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); @@ -167,15 +161,15 @@ inline void Write(u32 addr, const T data) { break; } - case GPU_REG_INDEX(display_transfer_config.trigger): - { + case GPU_REG_INDEX(display_transfer_config.trigger): { MICROPROFILE_SCOPE(GPU_DisplayTransfer); const auto& config = g_regs.display_transfer_config; if (config.trigger & 1) { if (Pica::g_debug_context) - Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); + Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, + nullptr); if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) { u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); @@ -187,17 +181,23 @@ inline void Write(u32 addr, const T data) { u32 output_width = config.texture_copy.output_width * 16; u32 output_gap = config.texture_copy.output_gap * 16; - size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); - Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size)); + size_t contiguous_input_size = + config.texture_copy.size / input_width * (input_width + input_gap); + Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), + static_cast<u32>(contiguous_input_size)); - size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size)); + size_t contiguous_output_size = + config.texture_copy.size / output_width * (output_width + output_gap); + Memory::RasterizerFlushAndInvalidateRegion( + config.GetPhysicalOutputAddress(), + static_cast<u32>(contiguous_output_size)); u32 remaining_size = config.texture_copy.size; u32 remaining_input = input_width; u32 remaining_output = output_width; while (remaining_size > 0) { - u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); + u32 copy_size = + std::min({remaining_input, remaining_output, remaining_size}); std::memcpy(dst_pointer, src_pointer, copy_size); src_pointer += copy_size; @@ -217,10 +217,11 @@ inline void Write(u32 addr, const T data) { } } - LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", - config.texture_copy.size, - config.GetPhysicalInputAddress(), input_width, input_gap, - config.GetPhysicalOutputAddress(), output_width, output_gap, + LOG_TRACE( + HW_GPU, + "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", + config.texture_copy.size, config.GetPhysicalInputAddress(), input_width, + input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap, config.flags); GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); @@ -228,7 +229,8 @@ inline void Write(u32 addr, const T data) { } if (config.scaling > config.ScaleXY) { - LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); + LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", + config.scaling.Value()); UNIMPLEMENTED(); break; } @@ -245,11 +247,14 @@ inline void Write(u32 addr, const T data) { u32 output_width = config.output_width >> horizontal_scale; u32 output_height = config.output_height >> vertical_scale; - u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); - u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); + u32 input_size = config.input_width * config.input_height * + GPU::Regs::BytesPerPixel(config.input_format); + u32 output_size = + output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); + Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), + output_size); for (u32 y = 0; y < output_height; ++y) { for (u32 x = 0; x < output_width; ++x) { @@ -278,11 +283,14 @@ inline void Write(u32 addr, const T data) { u32 coarse_y = y & ~7; u32 stride = output_width * dst_bytes_per_pixel; - src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; - dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; + src_offset = + (input_x + input_y * config.input_width) * src_bytes_per_pixel; + dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + + coarse_y * stride; } else { // Both input and output are linear - src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; + src_offset = + (input_x + input_y * config.input_width) * src_bytes_per_pixel; dst_offset = (x + y * output_width) * dst_bytes_per_pixel; } } else { @@ -291,7 +299,9 @@ inline void Write(u32 addr, const T data) { u32 coarse_y = input_y & ~7; u32 stride = config.input_width * src_bytes_per_pixel; - src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; + src_offset = VideoCore::GetMortonOffset(input_x, input_y, + src_bytes_per_pixel) + + coarse_y * stride; dst_offset = (x + y * output_width) * dst_bytes_per_pixel; } else { // Both input and output are tiled @@ -301,20 +311,27 @@ inline void Write(u32 addr, const T data) { u32 in_coarse_y = input_y & ~7; u32 in_stride = config.input_width * src_bytes_per_pixel; - src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; - dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; + src_offset = VideoCore::GetMortonOffset(input_x, input_y, + src_bytes_per_pixel) + + in_coarse_y * in_stride; + dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + + out_coarse_y * out_stride; } } const u8* src_pixel = src_pointer + src_offset; src_color = DecodePixel(config.input_format, src_pixel); if (config.scaling == config.ScaleX) { - Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); + Math::Vec4<u8> pixel = + DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); src_color = ((src_color + pixel) / 2).Cast<u8>(); } else if (config.scaling == config.ScaleXY) { - Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); - Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); - Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); + Math::Vec4<u8> pixel1 = DecodePixel( + config.input_format, src_pixel + 1 * src_bytes_per_pixel); + Math::Vec4<u8> pixel2 = DecodePixel( + config.input_format, src_pixel + 2 * src_bytes_per_pixel); + Math::Vec4<u8> pixel3 = DecodePixel( + config.input_format, src_pixel + 3 * src_bytes_per_pixel); src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); } @@ -341,17 +358,20 @@ inline void Write(u32 addr, const T data) { break; default: - LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); + LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", + config.output_format.Value()); break; } } } - LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", - config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), - config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), - config.GetPhysicalOutputAddress(), output_width, output_height, - config.output_format.Value(), config.flags); + LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> " + "0x%08x(%ux%u), dst format %x, flags 0x%08X", + config.output_height * output_width * + GPU::Regs::BytesPerPixel(config.output_format), + config.GetPhysicalInputAddress(), config.input_width.Value(), + config.input_height.Value(), config.GetPhysicalOutputAddress(), + output_width, output_height, config.output_format.Value(), config.flags); } g_regs.display_transfer_config.trigger = 0; @@ -361,17 +381,16 @@ inline void Write(u32 addr, const T data) { } // Seems like writing to this register triggers processing - case GPU_REG_INDEX(command_processor_config.trigger): - { + case GPU_REG_INDEX(command_processor_config.trigger): { const auto& config = g_regs.command_processor_config; - if (config.trigger & 1) - { + if (config.trigger & 1) { MICROPROFILE_SCOPE(GPU_CmdlistProcessing); u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress()); if (Pica::g_debug_context && Pica::g_debug_context->recorder) { - Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress()); + Pica::g_debug_context->recorder->MemoryAccessed( + (u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress()); } Pica::CommandProcessor::ProcessCommandList(buffer, config.size); @@ -389,16 +408,17 @@ inline void Write(u32 addr, const T data) { // This is happening *after* handling the write to make sure we properly catch all memory reads. if (Pica::g_debug_context && Pica::g_debug_context->recorder) { // addr + GPU VBase - IO VBase + IO PBase - Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); + Pica::g_debug_context->recorder->RegisterWritten<T>( + addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); } } // Explicitly instantiate template functions because we aren't defining this in the header: -template void Read<u64>(u64 &var, const u32 addr); -template void Read<u32>(u32 &var, const u32 addr); -template void Read<u16>(u16 &var, const u32 addr); -template void Read<u8>(u8 &var, const u32 addr); +template void Read<u64>(u64& var, const u32 addr); +template void Read<u32>(u32& var, const u32 addr); +template void Read<u16>(u16& var, const u32 addr); +template void Read<u8>(u8& var, const u32 addr); template void Write<u64>(u32 addr, const u64 data); template void Write<u32>(u32 addr, const u32 data); @@ -417,8 +437,9 @@ static void VBlankCallback(u64 userdata, int cycles_late) { // - If frameskip == 0 (disabled), always swap buffers // - If frameskip == 1, swap buffers every other frame (starting from the first frame) // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) - if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || - Settings::values.frame_skip == 0) { + if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && + last_skip_frame != g_skip_frame) || + Settings::values.frame_skip == 0) { VideoCore::g_renderer->SwapBuffers(); } @@ -448,12 +469,12 @@ void Init() { // .. or at least these are the ones used by system applets. // There's probably a smarter way to come up with addresses // like this which does not require hardcoding. - framebuffer_top.address_left1 = 0x181E6000; - framebuffer_top.address_left2 = 0x1822C800; + framebuffer_top.address_left1 = 0x181E6000; + framebuffer_top.address_left2 = 0x1822C800; framebuffer_top.address_right1 = 0x18273000; framebuffer_top.address_right2 = 0x182B9800; - framebuffer_sub.address_left1 = 0x1848F000; - framebuffer_sub.address_left2 = 0x184C7800; + framebuffer_sub.address_left1 = 0x1848F000; + framebuffer_sub.address_left2 = 0x184C7800; framebuffer_top.width.Assign(240); framebuffer_top.height.Assign(400); diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index da4c345b4..32ddc5697 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -6,7 +6,6 @@ #include <cstddef> #include <type_traits> - #include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" @@ -17,7 +16,8 @@ namespace GPU { // Returns index corresponding to the Regs member labeled by field_name // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions // when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])). -// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members +// For details cf. +// https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members // Hopefully, this will be fixed sometime in the future. // For lack of better alternatives, we currently hardcode the offsets when constant // expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts @@ -30,8 +30,9 @@ namespace GPU { // really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX // and then performs a (no-op) cast to size_t iff the second argument matches the expected // field offset. Otherwise, the compiler will fail to compile this code. -#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ - ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name)) +#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ + ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type) \ + GPU_REG_INDEX(field_name)) #endif // MMIO region 0x1EFxxxxx @@ -44,18 +45,18 @@ struct Regs { // support for that. #define ASSERT_MEMBER_SIZE(name, size_in_bytes) #else -#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ - static_assert(sizeof(name) == size_in_bytes, \ +#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ + static_assert(sizeof(name) == size_in_bytes, \ "Structure size and register block length don't match") #endif // Components are laid out in reverse byte order, most significant bits first. enum class PixelFormat : u32 { - RGBA8 = 0, - RGB8 = 1, + RGBA8 = 0, + RGB8 = 1, RGB565 = 2, RGB5A1 = 3, - RGBA4 = 4, + RGBA4 = 4, }; /** @@ -88,8 +89,8 @@ struct Regs { BitField<0, 16, u32> value_16bit; // TODO: Verify component order - BitField< 0, 8, u32> value_24bit_r; - BitField< 8, 8, u32> value_24bit_g; + BitField<0, 8, u32> value_24bit_r; + BitField<8, 8, u32> value_24bit_g; BitField<16, 8, u32> value_24bit_b; }; @@ -126,7 +127,7 @@ struct Regs { union { u32 size; - BitField< 0, 16, u32> width; + BitField<0, 16, u32> width; BitField<16, 16, u32> height; }; @@ -138,7 +139,7 @@ struct Regs { union { u32 format; - BitField< 0, 3, PixelFormat> color_format; + BitField<0, 3, PixelFormat> color_format; }; INSERT_PADDING_WORDS(0x1); @@ -180,35 +181,37 @@ struct Regs { union { u32 output_size; - BitField< 0, 16, u32> output_width; + BitField<0, 16, u32> output_width; BitField<16, 16, u32> output_height; }; union { u32 input_size; - BitField< 0, 16, u32> input_width; + BitField<0, 16, u32> input_width; BitField<16, 16, u32> input_height; }; enum ScalingMode : u32 { - NoScale = 0, // Doesn't scale the image - ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter - ScaleXY = 2, // Downscales the image in half in both the X and Y axes and applies a box filter + NoScale = 0, // Doesn't scale the image + ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter + ScaleXY = + 2, // Downscales the image in half in both the X and Y axes and applies a box filter }; union { u32 flags; - BitField< 0, 1, u32> flip_vertically; // flips input data vertically - BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format - BitField< 2, 1, u32> crop_input_lines; - BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields - BitField< 5, 1, u32> dont_swizzle; - BitField< 8, 3, PixelFormat> input_format; + BitField<0, 1, u32> flip_vertically; // flips input data vertically + BitField<1, 1, u32> input_linear; // Converts from linear to tiled format + BitField<2, 1, u32> crop_input_lines; + BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any + // processing and respecting texture copy fields + BitField<5, 1, u32> dont_swizzle; + BitField<8, 3, PixelFormat> input_format; BitField<12, 3, PixelFormat> output_format; /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. - BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented + BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer }; @@ -225,14 +228,14 @@ struct Regs { union { u32 input_size; - BitField< 0, 16, u32> input_width; + BitField<0, 16, u32> input_width; BitField<16, 16, u32> input_gap; }; union { u32 output_size; - BitField< 0, 16, u32> output_width; + BitField<0, 16, u32> output_width; BitField<16, 16, u32> output_gap; }; } texture_copy; @@ -267,12 +270,12 @@ struct Regs { return sizeof(Regs) / sizeof(u32); } - const u32& operator [] (int index) const { + const u32& operator[](int index) const { const u32* content = reinterpret_cast<const u32*>(this); return content[index]; } - u32& operator [] (int index) { + u32& operator[](int index) { u32* content = reinterpret_cast<u32*>(this); return content[index]; } @@ -294,28 +297,29 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan // is technically allowed since C++11. This macro should be enabled once MSVC adds // support for that. #ifndef _MSC_VER -#define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Regs, field_name) == position * 4, \ - "Field "#field_name" has invalid position") - -ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); -ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); -ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); -ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); -ASSERT_REG_POSITION(display_transfer_config, 0x00300); +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(Regs, field_name) == position * 4, \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); +ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); +ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); +ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); +ASSERT_REG_POSITION(display_transfer_config, 0x00300); ASSERT_REG_POSITION(command_processor_config, 0x00638); #undef ASSERT_REG_POSITION #endif // !defined(_MSC_VER) -// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. +// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value +// anyway. static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set"); extern Regs g_regs; extern bool g_skip_frame; template <typename T> -void Read(T &var, const u32 addr); +void Read(T& var, const u32 addr); template <typename T> void Write(u32 addr, const T data); @@ -326,5 +330,4 @@ void Init(); /// Shutdown hardware void Shutdown(); - } // namespace diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp index b5fdbf9c1..9ff8825b2 100644 --- a/src/core/hw/hw.cpp +++ b/src/core/hw/hw.cpp @@ -4,15 +4,14 @@ #include "common/common_types.h" #include "common/logging/log.h" - -#include "core/hw/hw.h" #include "core/hw/gpu.h" +#include "core/hw/hw.h" #include "core/hw/lcd.h" namespace HW { template <typename T> -inline void Read(T &var, const u32 addr) { +inline void Read(T& var, const u32 addr) { switch (addr & 0xFFFFF000) { case VADDR_GPU: case VADDR_GPU + 0x1000: @@ -71,10 +70,10 @@ inline void Write(u32 addr, const T data) { // Explicitly instantiate template functions because we aren't defining this in the header: -template void Read<u64>(u64 &var, const u32 addr); -template void Read<u32>(u32 &var, const u32 addr); -template void Read<u16>(u16 &var, const u32 addr); -template void Read<u8>(u8 &var, const u32 addr); +template void Read<u64>(u64& var, const u32 addr); +template void Read<u32>(u32& var, const u32 addr); +template void Read<u16>(u16& var, const u32 addr); +template void Read<u8>(u8& var, const u32 addr); template void Write<u64>(u32 addr, const u64 data); template void Write<u32>(u32 addr, const u32 data); @@ -82,8 +81,7 @@ template void Write<u16>(u32 addr, const u16 data); template void Write<u8>(u32 addr, const u8 data); /// Update hardware -void Update() { -} +void Update() {} /// Initialize hardware void Init() { @@ -98,5 +96,4 @@ void Shutdown() { LCD::Shutdown(); LOG_DEBUG(HW, "shutdown OK"); } - } diff --git a/src/core/hw/hw.h b/src/core/hw/hw.h index d65608910..a3c5d2ea3 100644 --- a/src/core/hw/hw.h +++ b/src/core/hw/hw.h @@ -10,30 +10,30 @@ namespace HW { /// Beginnings of IO register regions, in the user VA space. enum : u32 { - VADDR_HASH = 0x1EC01000, - VADDR_CSND = 0x1EC03000, - VADDR_DSP = 0x1EC40000, - VADDR_PDN = 0x1EC41000, - VADDR_CODEC = 0x1EC41000, - VADDR_SPI = 0x1EC42000, - VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM? - VADDR_I2C = 0x1EC44000, - VADDR_CODEC_2 = 0x1EC45000, - VADDR_HID = 0x1EC46000, - VADDR_GPIO = 0x1EC47000, - VADDR_I2C_2 = 0x1EC48000, - VADDR_SPI_3 = 0x1EC60000, - VADDR_I2C_3 = 0x1EC61000, - VADDR_MIC = 0x1EC62000, - VADDR_PXI = 0x1EC63000, - VADDR_LCD = 0x1ED02000, - VADDR_DSP_2 = 0x1ED03000, - VADDR_HASH_2 = 0x1EE01000, - VADDR_GPU = 0x1EF00000, + VADDR_HASH = 0x1EC01000, + VADDR_CSND = 0x1EC03000, + VADDR_DSP = 0x1EC40000, + VADDR_PDN = 0x1EC41000, + VADDR_CODEC = 0x1EC41000, + VADDR_SPI = 0x1EC42000, + VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM? + VADDR_I2C = 0x1EC44000, + VADDR_CODEC_2 = 0x1EC45000, + VADDR_HID = 0x1EC46000, + VADDR_GPIO = 0x1EC47000, + VADDR_I2C_2 = 0x1EC48000, + VADDR_SPI_3 = 0x1EC60000, + VADDR_I2C_3 = 0x1EC61000, + VADDR_MIC = 0x1EC62000, + VADDR_PXI = 0x1EC63000, + VADDR_LCD = 0x1ED02000, + VADDR_DSP_2 = 0x1ED03000, + VADDR_HASH_2 = 0x1EE01000, + VADDR_GPU = 0x1EF00000, }; template <typename T> -void Read(T &var, const u32 addr); +void Read(T& var, const u32 addr); template <typename T> void Write(u32 addr, const T data); diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp index 6f93709e3..2aa89de18 100644 --- a/src/core/hw/lcd.cpp +++ b/src/core/hw/lcd.cpp @@ -3,13 +3,10 @@ // Refer to the license.txt file included. #include <cstring> - #include "common/common_types.h" #include "common/logging/log.h" - #include "core/hw/hw.h" #include "core/hw/lcd.h" - #include "core/tracer/recorder.h" #include "video_core/debug_utils/debug_utils.h" @@ -18,7 +15,7 @@ namespace LCD { Regs g_regs; template <typename T> -inline void Read(T &var, const u32 raw_addr) { +inline void Read(T& var, const u32 raw_addr) { u32 addr = raw_addr - HW::VADDR_LCD; u32 index = addr / 4; @@ -48,16 +45,17 @@ inline void Write(u32 addr, const T data) { // This is happening *after* handling the write to make sure we properly catch all memory reads. if (Pica::g_debug_context && Pica::g_debug_context->recorder) { // addr + GPU VBase - IO VBase + IO PBase - Pica::g_debug_context->recorder->RegisterWritten<T>(addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data); + Pica::g_debug_context->recorder->RegisterWritten<T>( + addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data); } } // Explicitly instantiate template functions because we aren't defining this in the header: -template void Read<u64>(u64 &var, const u32 addr); -template void Read<u32>(u32 &var, const u32 addr); -template void Read<u16>(u16 &var, const u32 addr); -template void Read<u8>(u8 &var, const u32 addr); +template void Read<u64>(u64& var, const u32 addr); +template void Read<u32>(u32& var, const u32 addr); +template void Read<u16>(u16& var, const u32 addr); +template void Read<u8>(u8& var, const u32 addr); template void Write<u64>(u32 addr, const u64 data); template void Write<u32>(u32 addr, const u32 data); diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h index 57029c5e8..191fd44af 100644 --- a/src/core/hw/lcd.h +++ b/src/core/hw/lcd.h @@ -6,7 +6,6 @@ #include <cstddef> #include <type_traits> - #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -42,16 +41,15 @@ struct Regs { return sizeof(Regs) / sizeof(u32); } - const u32& operator [] (int index) const { + const u32& operator[](int index) const { const u32* content = reinterpret_cast<const u32*>(this); return content[index]; } - u32& operator [] (int index) { + u32& operator[](int index) { u32* content = reinterpret_cast<u32*>(this); return content[index]; } - }; static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); @@ -59,14 +57,14 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan // is technically allowed since C++11. This macro should be enabled once MSVC adds // support for that. #ifndef _MSC_VER -#define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Regs, field_name) == position * 4, \ - "Field "#field_name" has invalid position") +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(Regs, field_name) == position * 4, \ + "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(color_fill_top, 0x81); -ASSERT_REG_POSITION(backlight_top, 0x90); +ASSERT_REG_POSITION(color_fill_top, 0x81); +ASSERT_REG_POSITION(backlight_top, 0x90); ASSERT_REG_POSITION(color_fill_bottom, 0x281); -ASSERT_REG_POSITION(backlight_bottom, 0x290); +ASSERT_REG_POSITION(backlight_bottom, 0x290); #undef ASSERT_REG_POSITION #endif // !defined(_MSC_VER) @@ -74,7 +72,7 @@ ASSERT_REG_POSITION(backlight_bottom, 0x290); extern Regs g_regs; template <typename T> -void Read(T &var, const u32 addr); +void Read(T& var, const u32 addr); template <typename T> void Write(u32 addr, const T data); diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp index 083391e83..6a6c707a2 100644 --- a/src/core/hw/y2r.cpp +++ b/src/core/hw/y2r.cpp @@ -6,13 +6,11 @@ #include <array> #include <cstddef> #include <memory> - #include "common/assert.h" #include "common/color.h" #include "common/common_types.h" #include "common/math_util.h" #include "common/vector_math.h" - #include "core/hle/service/y2r_u.h" #include "core/hw/y2r.h" #include "core/memory.h" @@ -27,9 +25,9 @@ static const size_t TILE_SIZE = 8 * 8; using ImageTile = std::array<u32, TILE_SIZE>; /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. -static void ConvertYUVToRGB(InputFormat input_format, - const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[], - unsigned int width, unsigned int height, const CoefficientSet& coefficients) { +static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U, + const u8* input_V, ImageTile output[], unsigned int width, + unsigned int height, const CoefficientSet& coefficients) { for (unsigned int y = 0; y < height; ++y) { for (unsigned int x = 0; x < width; ++x) { @@ -58,11 +56,11 @@ static void ConvertYUVToRGB(InputFormat input_format, // This conversion process is bit-exact with hardware, as far as could be tested. auto& c = coefficients; - s32 cY = c[0]*Y; + s32 cY = c[0] * Y; - s32 r = cY + c[1]*V; - s32 g = cY - c[3]*U - c[2]*V; - s32 b = cY + c[4]*U; + s32 r = cY + c[1] * V; + s32 g = cY - c[2] * V - c[3] * U; + s32 b = cY + c[4] * U; const s32 rounding_offset = 0x18; r = (r >> 3) + c[5] + rounding_offset; @@ -74,14 +72,14 @@ static void ConvertYUVToRGB(InputFormat input_format, u32* out = &output[tile][y * 8 + tile_x]; using MathUtil::Clamp; - *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | - ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | + *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | ((u32)Clamp(b >> 5, 0, 0xFF) << 8); } } } -/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit. +/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit +/// formats to 8-bit. template <size_t N> static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { const u8* input = Memory::GetPointer(buf.address); @@ -103,9 +101,10 @@ static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data } } -/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer. +/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA +/// transfer. static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, - OutputFormat output_format, u8 alpha) { + OutputFormat output_format, u8 alpha) { u8* output = Memory::GetPointer(buf.address); @@ -113,9 +112,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data u8* unit_end = output + buf.transfer_unit; while (output < unit_end) { u32 color = *input++; - Math::Vec4<u8> col_vec{ - (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha - }; + Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha}; switch (output_format) { case OutputFormat::RGBA8: @@ -145,7 +142,8 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data } } -static const u8 linear_lut[64] = { +static const u8 linear_lut[TILE_SIZE] = { + // clang-format off 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, @@ -154,9 +152,11 @@ static const u8 linear_lut[64] = { 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + // clang-format on }; -static const u8 morton_lut[64] = { +static const u8 morton_lut[TILE_SIZE] = { + // clang-format off 0, 1, 4, 5, 16, 17, 20, 21, 2, 3, 6, 7, 18, 19, 22, 23, 8, 9, 12, 13, 24, 25, 28, 29, @@ -165,15 +165,18 @@ static const u8 morton_lut[64] = { 34, 35, 38, 39, 50, 51, 54, 55, 40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63, + // clang-format on }; -static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { +static void RotateTile0(const ImageTile& input, ImageTile& output, int height, + const u8 out_map[64]) { for (int i = 0; i < height * 8; ++i) { output[out_map[i]] = input[i]; } } -static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { +static void RotateTile90(const ImageTile& input, ImageTile& output, int height, + const u8 out_map[64]) { int out_i = 0; for (int x = 0; x < 8; ++x) { for (int y = height - 1; y >= 0; --y) { @@ -182,16 +185,18 @@ static void RotateTile90(const ImageTile& input, ImageTile& output, int height, } } -static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { +static void RotateTile180(const ImageTile& input, ImageTile& output, int height, + const u8 out_map[64]) { int out_i = 0; for (int i = height * 8 - 1; i >= 0; --i) { output[out_map[out_i++]] = input[i]; } } -static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { +static void RotateTile270(const ImageTile& input, ImageTile& output, int height, + const u8 out_map[64]) { int out_i = 0; - for (int x = 8-1; x >= 0; --x) { + for (int x = 8 - 1; x >= 0; --x) { for (int y = 0; y < height; ++y) { output[out_map[out_i++]] = input[y * 8 + x]; } @@ -274,9 +279,11 @@ void PerformConversion(ConversionConfiguration& cvt) { const u8* tile_remap = nullptr; switch (cvt.block_alignment) { case BlockAlignment::Linear: - tile_remap = linear_lut; break; + tile_remap = linear_lut; + break; case BlockAlignment::Block8x8: - tile_remap = morton_lut; break; + tile_remap = morton_lut; + break; } for (unsigned int y = 0; y < cvt.input_lines; y += 8) { @@ -320,7 +327,7 @@ void PerformConversion(ConversionConfiguration& cvt) { // Note(yuriks): If additional optimization is required, input_format can be moved to a // template parameter, so that its dispatch can be moved to outside the inner loop. ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), - cvt.input_line_width, row_height, cvt.coefficients); + cvt.input_line_width, row_height, cvt.coefficients); u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); @@ -367,9 +374,9 @@ void PerformConversion(ConversionConfiguration& cvt) { // Note(yuriks): If additional optimization is required, output_format can be moved to a // template parameter, so that its dispatch can be moved to outside the inner loop. - SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha); + SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, + cvt.output_format, (u8)cvt.alpha); } } - } } diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h index 729e1eee3..6b6e71bec 100644 --- a/src/core/hw/y2r.h +++ b/src/core/hw/y2r.h @@ -3,13 +3,12 @@ // Refer to the license.txt file included. namespace Y2R_U { - struct ConversionConfiguration; +struct ConversionConfiguration; } namespace HW { namespace Y2R { void PerformConversion(Y2R_U::ConversionConfiguration& cvt); - } } |