8 files changed, 211 insertions, 188 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index a4dfb7e43..0e6b91e3a 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -5,34 +5,26 @@
 #include <cstring>
 #include <numeric>
 #include <type_traits>
-
 #include "common/color.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
 #include "common/vector_math.h"
-
-#include "core/settings.h"
-#include "core/memory.h"
 #include "core/core_timing.h"
-
 #include "core/hle/service/gsp_gpu.h"
 #include "core/hle/service/hid/hid.h"
-
-#include "core/hw/hw.h"
 #include "core/hw/gpu.h"
-
+#include "core/hw/hw.h"
+#include "core/memory.h"
+#include "core/settings.h"
 #include "core/tracer/recorder.h"
-
 #include "video_core/command_processor.h"
+#include "video_core/debug_utils/debug_utils.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
 #include "video_core/utils.h"
 #include "video_core/video_core.h"
 
-#include "video_core/debug_utils/debug_utils.h"
-
-
 namespace GPU {
 
 Regs g_regs;
@@ -49,7 +41,7 @@ static u64 frame_count;
 static bool last_skip_frame;
 
 template <typename T>
-inline void Read(T &var, const u32 raw_addr) {
+inline void Read(T& var, const u32 raw_addr) {
     u32 addr = raw_addr - HW::VADDR_GPU;
     u32 index = addr / 4;
 
@@ -105,8 +97,7 @@ inline void Write(u32 addr, const T data) {
 
     // Memory fills are triggered once the fill value is written.
     case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3):
-    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3):
-    {
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): {
         const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
         auto& config = g_regs.memory_fill_config[is_second_filler];
 
@@ -125,7 +116,9 @@ inline void Write(u32 addr, const T data) {
                 //       regions that were between surfaces or within the touching
                 //       ones for cpu to manually fill here.
                 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
-                    Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
+                    Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
+                                                               config.GetEndAddress() -
+                                                                   config.GetStartAddress());
 
                     if (config.fill_24bit) {
                         // fill with 24-bit values
@@ -150,7 +143,8 @@ inline void Write(u32 addr, const T data) {
                     }
                 }
 
-                LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
+                LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(),
+                          config.GetEndAddress());
 
                 if (!is_second_filler) {
                     GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
@@ -167,15 +161,15 @@ inline void Write(u32 addr, const T data) {
         break;
     }
 
-    case GPU_REG_INDEX(display_transfer_config.trigger):
-    {
+    case GPU_REG_INDEX(display_transfer_config.trigger): {
         MICROPROFILE_SCOPE(GPU_DisplayTransfer);
 
         const auto& config = g_regs.display_transfer_config;
         if (config.trigger & 1) {
 
             if (Pica::g_debug_context)
-                Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
+                Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
+                                               nullptr);
 
             if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
                 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
@@ -187,17 +181,23 @@ inline void Write(u32 addr, const T data) {
                     u32 output_width = config.texture_copy.output_width * 16;
                     u32 output_gap = config.texture_copy.output_gap * 16;
 
-                    size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
-                    Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size));
+                    size_t contiguous_input_size =
+                        config.texture_copy.size / input_width * (input_width + input_gap);
+                    Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
+                                                  static_cast<u32>(contiguous_input_size));
 
-                    size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
-                    Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
+                    size_t contiguous_output_size =
+                        config.texture_copy.size / output_width * (output_width + output_gap);
+                    Memory::RasterizerFlushAndInvalidateRegion(
+                        config.GetPhysicalOutputAddress(),
+                        static_cast<u32>(contiguous_output_size));
 
                     u32 remaining_size = config.texture_copy.size;
                     u32 remaining_input = input_width;
                     u32 remaining_output = output_width;
                     while (remaining_size > 0) {
-                        u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
+                        u32 copy_size =
+                            std::min({remaining_input, remaining_output, remaining_size});
 
                         std::memcpy(dst_pointer, src_pointer, copy_size);
                         src_pointer += copy_size;
@@ -217,10 +217,11 @@ inline void Write(u32 addr, const T data) {
                         }
                     }
 
-                    LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
-                        config.texture_copy.size,
-                        config.GetPhysicalInputAddress(), input_width, input_gap,
-                        config.GetPhysicalOutputAddress(), output_width, output_gap,
+                    LOG_TRACE(
+                        HW_GPU,
+                        "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
+                        config.texture_copy.size, config.GetPhysicalInputAddress(), input_width,
+                        input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap,
                         config.flags);
 
                     GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
@@ -228,7 +229,8 @@ inline void Write(u32 addr, const T data) {
                 }
 
                 if (config.scaling > config.ScaleXY) {
-                    LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
+                    LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u",
+                                 config.scaling.Value());
                     UNIMPLEMENTED();
                     break;
                 }
@@ -245,11 +247,14 @@ inline void Write(u32 addr, const T data) {
                 u32 output_width = config.output_width >> horizontal_scale;
                 u32 output_height = config.output_height >> vertical_scale;
 
-                u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
-                u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
+                u32 input_size = config.input_width * config.input_height *
+                                 GPU::Regs::BytesPerPixel(config.input_format);
+                u32 output_size =
+                    output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
 
                 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
-                Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
+                Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
+                                                           output_size);
 
                 for (u32 y = 0; y < output_height; ++y) {
                     for (u32 x = 0; x < output_width; ++x) {
@@ -278,11 +283,14 @@ inline void Write(u32 addr, const T data) {
                                 u32 coarse_y = y & ~7;
                                 u32 stride = output_width * dst_bytes_per_pixel;
 
-                                src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
-                                dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
+                                src_offset =
+                                    (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+                                dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
+                                             coarse_y * stride;
                             } else {
                                 // Both input and output are linear
-                                src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+                                src_offset =
+                                    (input_x + input_y * config.input_width) * src_bytes_per_pixel;
                                 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
                             }
                         } else {
@@ -291,7 +299,9 @@ inline void Write(u32 addr, const T data) {
                                 u32 coarse_y = input_y & ~7;
                                 u32 stride = config.input_width * src_bytes_per_pixel;
 
-                                src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
+                                src_offset = VideoCore::GetMortonOffset(input_x, input_y,
+                                                                        src_bytes_per_pixel) +
+                                             coarse_y * stride;
                                 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
                             } else {
                                 // Both input and output are tiled
@@ -301,20 +311,27 @@ inline void Write(u32 addr, const T data) {
                                 u32 in_coarse_y = input_y & ~7;
                                 u32 in_stride = config.input_width * src_bytes_per_pixel;
 
-                                src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
-                                dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
+                                src_offset = VideoCore::GetMortonOffset(input_x, input_y,
+                                                                        src_bytes_per_pixel) +
+                                             in_coarse_y * in_stride;
+                                dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
+                                             out_coarse_y * out_stride;
                             }
                         }
 
                         const u8* src_pixel = src_pointer + src_offset;
                         src_color = DecodePixel(config.input_format, src_pixel);
                         if (config.scaling == config.ScaleX) {
-                            Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
+                            Math::Vec4<u8> pixel =
+                                DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
                             src_color = ((src_color + pixel) / 2).Cast<u8>();
                         } else if (config.scaling == config.ScaleXY) {
-                            Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
-                            Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
-                            Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
+                            Math::Vec4<u8> pixel1 = DecodePixel(
+                                config.input_format, src_pixel + 1 * src_bytes_per_pixel);
+                            Math::Vec4<u8> pixel2 = DecodePixel(
+                                config.input_format, src_pixel + 2 * src_bytes_per_pixel);
+                            Math::Vec4<u8> pixel3 = DecodePixel(
+                                config.input_format, src_pixel + 3 * src_bytes_per_pixel);
                             src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
                         }
 
@@ -341,17 +358,20 @@ inline void Write(u32 addr, const T data) {
                             break;
 
                         default:
-                            LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value());
+                            LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x",
+                                      config.output_format.Value());
                             break;
                         }
                     }
                 }
 
-                LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
-                      config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
-                      config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
-                      config.GetPhysicalOutputAddress(), output_width, output_height,
-                      config.output_format.Value(), config.flags);
+                LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> "
+                                  "0x%08x(%ux%u), dst format %x, flags 0x%08X",
+                          config.output_height * output_width *
+                              GPU::Regs::BytesPerPixel(config.output_format),
+                          config.GetPhysicalInputAddress(), config.input_width.Value(),
+                          config.input_height.Value(), config.GetPhysicalOutputAddress(),
+                          output_width, output_height, config.output_format.Value(), config.flags);
             }
 
             g_regs.display_transfer_config.trigger = 0;
@@ -361,17 +381,16 @@ inline void Write(u32 addr, const T data) {
     }
 
     // Seems like writing to this register triggers processing
-    case GPU_REG_INDEX(command_processor_config.trigger):
-    {
+    case GPU_REG_INDEX(command_processor_config.trigger): {
         const auto& config = g_regs.command_processor_config;
-        if (config.trigger & 1)
-        {
+        if (config.trigger & 1) {
             MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
 
             u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
 
             if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
-                Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
+                Pica::g_debug_context->recorder->MemoryAccessed(
+                    (u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
             }
 
             Pica::CommandProcessor::ProcessCommandList(buffer, config.size);
@@ -389,16 +408,17 @@ inline void Write(u32 addr, const T data) {
     // This is happening *after* handling the write to make sure we properly catch all memory reads.
     if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
         // addr + GPU VBase - IO VBase + IO PBase
-        Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
+        Pica::g_debug_context->recorder->RegisterWritten<T>(
+            addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
     }
 }
 
 // Explicitly instantiate template functions because we aren't defining this in the header:
 
-template void Read<u64>(u64 &var, const u32 addr);
-template void Read<u32>(u32 &var, const u32 addr);
-template void Read<u16>(u16 &var, const u32 addr);
-template void Read<u8>(u8 &var, const u32 addr);
+template void Read<u64>(u64& var, const u32 addr);
+template void Read<u32>(u32& var, const u32 addr);
+template void Read<u16>(u16& var, const u32 addr);
+template void Read<u8>(u8& var, const u32 addr);
 
 template void Write<u64>(u32 addr, const u64 data);
 template void Write<u32>(u32 addr, const u32 data);
@@ -417,8 +437,9 @@ static void VBlankCallback(u64 userdata, int cycles_late) {
     //  - If frameskip == 0 (disabled), always swap buffers
     //  - If frameskip == 1, swap buffers every other frame (starting from the first frame)
     //  - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame)
-    if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) ||
-            Settings::values.frame_skip == 0) {
+    if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) &&
+         last_skip_frame != g_skip_frame) ||
+        Settings::values.frame_skip == 0) {
         VideoCore::g_renderer->SwapBuffers();
     }
 
@@ -448,12 +469,12 @@ void Init() {
     // .. or at least these are the ones used by system applets.
     // There's probably a smarter way to come up with addresses
     // like this which does not require hardcoding.
-    framebuffer_top.address_left1  = 0x181E6000;
-    framebuffer_top.address_left2  = 0x1822C800;
+    framebuffer_top.address_left1 = 0x181E6000;
+    framebuffer_top.address_left2 = 0x1822C800;
     framebuffer_top.address_right1 = 0x18273000;
     framebuffer_top.address_right2 = 0x182B9800;
-    framebuffer_sub.address_left1  = 0x1848F000;
-    framebuffer_sub.address_left2  = 0x184C7800;
+    framebuffer_sub.address_left1 = 0x1848F000;
+    framebuffer_sub.address_left2 = 0x184C7800;
 
     framebuffer_top.width.Assign(240);
     framebuffer_top.height.Assign(400);
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index da4c345b4..32ddc5697 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -6,7 +6,6 @@
 
 #include <cstddef>
 #include <type_traits>
-
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
@@ -17,7 +16,8 @@ namespace GPU {
 // Returns index corresponding to the Regs member labeled by field_name
 // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
 //       when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])).
-//       For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
+//       For details cf.
+//       https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
 //       Hopefully, this will be fixed sometime in the future.
 //       For lack of better alternatives, we currently hardcode the offsets when constant
 //       expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
@@ -30,8 +30,9 @@ namespace GPU {
 //       really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX
 //       and then performs a (no-op) cast to size_t iff the second argument matches the expected
 //       field offset. Otherwise, the compiler will fail to compile this code.
-#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
-    ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name))
+#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index)                              \
+    ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type) \
+         GPU_REG_INDEX(field_name))
 #endif
 
 // MMIO region 0x1EFxxxxx
@@ -44,18 +45,18 @@ struct Regs {
 //       support for that.
 #define ASSERT_MEMBER_SIZE(name, size_in_bytes)
 #else
-#define ASSERT_MEMBER_SIZE(name, size_in_bytes)  \
-    static_assert(sizeof(name) == size_in_bytes, \
+#define ASSERT_MEMBER_SIZE(name, size_in_bytes)                                                    \
+    static_assert(sizeof(name) == size_in_bytes,                                                   \
                   "Structure size and register block length don't match")
 #endif
 
     // Components are laid out in reverse byte order, most significant bits first.
     enum class PixelFormat : u32 {
-        RGBA8  = 0,
-        RGB8   = 1,
+        RGBA8 = 0,
+        RGB8 = 1,
         RGB565 = 2,
         RGB5A1 = 3,
-        RGBA4  = 4,
+        RGBA4 = 4,
     };
 
     /**
@@ -88,8 +89,8 @@ struct Regs {
             BitField<0, 16, u32> value_16bit;
 
             // TODO: Verify component order
-            BitField< 0, 8, u32> value_24bit_r;
-            BitField< 8, 8, u32> value_24bit_g;
+            BitField<0, 8, u32> value_24bit_r;
+            BitField<8, 8, u32> value_24bit_g;
             BitField<16, 8, u32> value_24bit_b;
         };
 
@@ -126,7 +127,7 @@ struct Regs {
         union {
             u32 size;
 
-            BitField< 0, 16, u32> width;
+            BitField<0, 16, u32> width;
             BitField<16, 16, u32> height;
         };
 
@@ -138,7 +139,7 @@ struct Regs {
         union {
             u32 format;
 
-            BitField< 0, 3, PixelFormat> color_format;
+            BitField<0, 3, PixelFormat> color_format;
         };
 
         INSERT_PADDING_WORDS(0x1);
@@ -180,35 +181,37 @@ struct Regs {
         union {
             u32 output_size;
 
-            BitField< 0, 16, u32> output_width;
+            BitField<0, 16, u32> output_width;
             BitField<16, 16, u32> output_height;
         };
 
         union {
             u32 input_size;
 
-            BitField< 0, 16, u32> input_width;
+            BitField<0, 16, u32> input_width;
             BitField<16, 16, u32> input_height;
         };
 
         enum ScalingMode : u32 {
-            NoScale  = 0,  // Doesn't scale the image
-            ScaleX   = 1,  // Downscales the image in half in the X axis and applies a box filter
-            ScaleXY  = 2,  // Downscales the image in half in both the X and Y axes and applies a box filter
+            NoScale = 0, // Doesn't scale the image
+            ScaleX = 1,  // Downscales the image in half in the X axis and applies a box filter
+            ScaleXY =
+                2, // Downscales the image in half in both the X and Y axes and applies a box filter
         };
 
         union {
             u32 flags;
 
-            BitField< 0, 1, u32> flip_vertically;  // flips input data vertically
-            BitField< 1, 1, u32> input_linear;     // Converts from linear to tiled format
-            BitField< 2, 1, u32> crop_input_lines;
-            BitField< 3, 1, u32> is_texture_copy;  // Copies the data without performing any processing and respecting texture copy fields
-            BitField< 5, 1, u32> dont_swizzle;
-            BitField< 8, 3, PixelFormat> input_format;
+            BitField<0, 1, u32> flip_vertically; // flips input data vertically
+            BitField<1, 1, u32> input_linear;    // Converts from linear to tiled format
+            BitField<2, 1, u32> crop_input_lines;
+            BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any
+                                                 // processing and respecting texture copy fields
+            BitField<5, 1, u32> dont_swizzle;
+            BitField<8, 3, PixelFormat> input_format;
             BitField<12, 3, PixelFormat> output_format;
             /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
-            BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
+            BitField<16, 1, u32> block_32;        // TODO(yuriks): unimplemented
             BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
         };
 
@@ -225,14 +228,14 @@ struct Regs {
             union {
                 u32 input_size;
 
-                BitField< 0, 16, u32> input_width;
+                BitField<0, 16, u32> input_width;
                 BitField<16, 16, u32> input_gap;
             };
 
             union {
                 u32 output_size;
 
-                BitField< 0, 16, u32> output_width;
+                BitField<0, 16, u32> output_width;
                 BitField<16, 16, u32> output_gap;
             };
         } texture_copy;
@@ -267,12 +270,12 @@ struct Regs {
         return sizeof(Regs) / sizeof(u32);
     }
 
-    const u32& operator [] (int index) const {
+    const u32& operator[](int index) const {
         const u32* content = reinterpret_cast<const u32*>(this);
         return content[index];
     }
 
-    u32& operator [] (int index) {
+    u32& operator[](int index) {
         u32* content = reinterpret_cast<u32*>(this);
         return content[index];
     }
@@ -294,28 +297,29 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan
 //       is technically allowed since C++11. This macro should be enabled once MSVC adds
 //       support for that.
 #ifndef _MSC_VER
-#define ASSERT_REG_POSITION(field_name, position)             \
-    static_assert(offsetof(Regs, field_name) == position * 4, \
-                  "Field "#field_name" has invalid position")
-
-ASSERT_REG_POSITION(memory_fill_config[0],    0x00004);
-ASSERT_REG_POSITION(memory_fill_config[1],    0x00008);
-ASSERT_REG_POSITION(framebuffer_config[0],    0x00117);
-ASSERT_REG_POSITION(framebuffer_config[1],    0x00157);
-ASSERT_REG_POSITION(display_transfer_config,  0x00300);
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(memory_fill_config[0], 0x00004);
+ASSERT_REG_POSITION(memory_fill_config[1], 0x00008);
+ASSERT_REG_POSITION(framebuffer_config[0], 0x00117);
+ASSERT_REG_POSITION(framebuffer_config[1], 0x00157);
+ASSERT_REG_POSITION(display_transfer_config, 0x00300);
 ASSERT_REG_POSITION(command_processor_config, 0x00638);
 
 #undef ASSERT_REG_POSITION
 #endif // !defined(_MSC_VER)
 
-// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
+// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value
+// anyway.
 static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set");
 
 extern Regs g_regs;
 extern bool g_skip_frame;
 
 template <typename T>
-void Read(T &var, const u32 addr);
+void Read(T& var, const u32 addr);
 
 template <typename T>
 void Write(u32 addr, const T data);
@@ -326,5 +330,4 @@ void Init();
 /// Shutdown hardware
 void Shutdown();
 
-
 } // namespace
diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp
index b5fdbf9c1..9ff8825b2 100644
--- a/src/core/hw/hw.cpp
+++ b/src/core/hw/hw.cpp
@@ -4,15 +4,14 @@
 
 #include "common/common_types.h"
 #include "common/logging/log.h"
-
-#include "core/hw/hw.h"
 #include "core/hw/gpu.h"
+#include "core/hw/hw.h"
 #include "core/hw/lcd.h"
 
 namespace HW {
 
 template <typename T>
-inline void Read(T &var, const u32 addr) {
+inline void Read(T& var, const u32 addr) {
     switch (addr & 0xFFFFF000) {
     case VADDR_GPU:
     case VADDR_GPU + 0x1000:
@@ -71,10 +70,10 @@ inline void Write(u32 addr, const T data) {
 
 // Explicitly instantiate template functions because we aren't defining this in the header:
 
-template void Read<u64>(u64 &var, const u32 addr);
-template void Read<u32>(u32 &var, const u32 addr);
-template void Read<u16>(u16 &var, const u32 addr);
-template void Read<u8>(u8 &var, const u32 addr);
+template void Read<u64>(u64& var, const u32 addr);
+template void Read<u32>(u32& var, const u32 addr);
+template void Read<u16>(u16& var, const u32 addr);
+template void Read<u8>(u8& var, const u32 addr);
 
 template void Write<u64>(u32 addr, const u64 data);
 template void Write<u32>(u32 addr, const u32 data);
@@ -82,8 +81,7 @@ template void Write<u16>(u32 addr, const u16 data);
 template void Write<u8>(u32 addr, const u8 data);
 
 /// Update hardware
-void Update() {
-}
+void Update() {}
 
 /// Initialize hardware
 void Init() {
@@ -98,5 +96,4 @@ void Shutdown() {
     LCD::Shutdown();
     LOG_DEBUG(HW, "shutdown OK");
 }
-
 }
diff --git a/src/core/hw/hw.h b/src/core/hw/hw.h
index d65608910..a3c5d2ea3 100644
--- a/src/core/hw/hw.h
+++ b/src/core/hw/hw.h
@@ -10,30 +10,30 @@ namespace HW {
 
 /// Beginnings of IO register regions, in the user VA space.
 enum : u32 {
-    VADDR_HASH      = 0x1EC01000,
-    VADDR_CSND      = 0x1EC03000,
-    VADDR_DSP       = 0x1EC40000,
-    VADDR_PDN       = 0x1EC41000,
-    VADDR_CODEC     = 0x1EC41000,
-    VADDR_SPI       = 0x1EC42000,
-    VADDR_SPI_2     = 0x1EC43000,   // Only used under TWL_FIRM?
-    VADDR_I2C       = 0x1EC44000,
-    VADDR_CODEC_2   = 0x1EC45000,
-    VADDR_HID       = 0x1EC46000,
-    VADDR_GPIO      = 0x1EC47000,
-    VADDR_I2C_2     = 0x1EC48000,
-    VADDR_SPI_3     = 0x1EC60000,
-    VADDR_I2C_3     = 0x1EC61000,
-    VADDR_MIC       = 0x1EC62000,
-    VADDR_PXI       = 0x1EC63000,
-    VADDR_LCD       = 0x1ED02000,
-    VADDR_DSP_2     = 0x1ED03000,
-    VADDR_HASH_2    = 0x1EE01000,
-    VADDR_GPU       = 0x1EF00000,
+    VADDR_HASH = 0x1EC01000,
+    VADDR_CSND = 0x1EC03000,
+    VADDR_DSP = 0x1EC40000,
+    VADDR_PDN = 0x1EC41000,
+    VADDR_CODEC = 0x1EC41000,
+    VADDR_SPI = 0x1EC42000,
+    VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM?
+    VADDR_I2C = 0x1EC44000,
+    VADDR_CODEC_2 = 0x1EC45000,
+    VADDR_HID = 0x1EC46000,
+    VADDR_GPIO = 0x1EC47000,
+    VADDR_I2C_2 = 0x1EC48000,
+    VADDR_SPI_3 = 0x1EC60000,
+    VADDR_I2C_3 = 0x1EC61000,
+    VADDR_MIC = 0x1EC62000,
+    VADDR_PXI = 0x1EC63000,
+    VADDR_LCD = 0x1ED02000,
+    VADDR_DSP_2 = 0x1ED03000,
+    VADDR_HASH_2 = 0x1EE01000,
+    VADDR_GPU = 0x1EF00000,
 };
 
 template <typename T>
-void Read(T &var, const u32 addr);
+void Read(T& var, const u32 addr);
 
 template <typename T>
 void Write(u32 addr, const T data);
diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp
index 6f93709e3..2aa89de18 100644
--- a/src/core/hw/lcd.cpp
+++ b/src/core/hw/lcd.cpp
@@ -3,13 +3,10 @@
 // Refer to the license.txt file included.
 
 #include <cstring>
-
 #include "common/common_types.h"
 #include "common/logging/log.h"
-
 #include "core/hw/hw.h"
 #include "core/hw/lcd.h"
-
 #include "core/tracer/recorder.h"
 #include "video_core/debug_utils/debug_utils.h"
 
@@ -18,7 +15,7 @@ namespace LCD {
 Regs g_regs;
 
 template <typename T>
-inline void Read(T &var, const u32 raw_addr) {
+inline void Read(T& var, const u32 raw_addr) {
     u32 addr = raw_addr - HW::VADDR_LCD;
     u32 index = addr / 4;
 
@@ -48,16 +45,17 @@ inline void Write(u32 addr, const T data) {
     // This is happening *after* handling the write to make sure we properly catch all memory reads.
     if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
         // addr + GPU VBase - IO VBase + IO PBase
-        Pica::g_debug_context->recorder->RegisterWritten<T>(addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data);
+        Pica::g_debug_context->recorder->RegisterWritten<T>(
+            addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data);
     }
 }
 
 // Explicitly instantiate template functions because we aren't defining this in the header:
 
-template void Read<u64>(u64 &var, const u32 addr);
-template void Read<u32>(u32 &var, const u32 addr);
-template void Read<u16>(u16 &var, const u32 addr);
-template void Read<u8>(u8 &var, const u32 addr);
+template void Read<u64>(u64& var, const u32 addr);
+template void Read<u32>(u32& var, const u32 addr);
+template void Read<u16>(u16& var, const u32 addr);
+template void Read<u8>(u8& var, const u32 addr);
 
 template void Write<u64>(u32 addr, const u64 data);
 template void Write<u32>(u32 addr, const u32 data);
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h
index 57029c5e8..191fd44af 100644
--- a/src/core/hw/lcd.h
+++ b/src/core/hw/lcd.h
@@ -6,7 +6,6 @@
 
 #include <cstddef>
 #include <type_traits>
-
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -42,16 +41,15 @@ struct Regs {
         return sizeof(Regs) / sizeof(u32);
     }
 
-    const u32& operator [] (int index) const {
+    const u32& operator[](int index) const {
         const u32* content = reinterpret_cast<const u32*>(this);
         return content[index];
     }
 
-    u32& operator [] (int index) {
+    u32& operator[](int index) {
         u32* content = reinterpret_cast<u32*>(this);
         return content[index];
     }
-
 };
 static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
 
@@ -59,14 +57,14 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan
 //       is technically allowed since C++11. This macro should be enabled once MSVC adds
 //       support for that.
 #ifndef _MSC_VER
-#define ASSERT_REG_POSITION(field_name, position) \
-    static_assert(offsetof(Regs, field_name) == position * 4, \
-              "Field "#field_name" has invalid position")
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
+                  "Field " #field_name " has invalid position")
 
-ASSERT_REG_POSITION(color_fill_top,    0x81);
-ASSERT_REG_POSITION(backlight_top,     0x90);
+ASSERT_REG_POSITION(color_fill_top, 0x81);
+ASSERT_REG_POSITION(backlight_top, 0x90);
 ASSERT_REG_POSITION(color_fill_bottom, 0x281);
-ASSERT_REG_POSITION(backlight_bottom,  0x290);
+ASSERT_REG_POSITION(backlight_bottom, 0x290);
 
 #undef ASSERT_REG_POSITION
 #endif // !defined(_MSC_VER)
@@ -74,7 +72,7 @@ ASSERT_REG_POSITION(backlight_bottom,  0x290);
 extern Regs g_regs;
 
 template <typename T>
-void Read(T &var, const u32 addr);
+void Read(T& var, const u32 addr);
 
 template <typename T>
 void Write(u32 addr, const T data);
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
index 083391e83..6a6c707a2 100644
--- a/src/core/hw/y2r.cpp
+++ b/src/core/hw/y2r.cpp
@@ -6,13 +6,11 @@
 #include <array>
 #include <cstddef>
 #include <memory>
-
 #include "common/assert.h"
 #include "common/color.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
 #include "common/vector_math.h"
-
 #include "core/hle/service/y2r_u.h"
 #include "core/hw/y2r.h"
 #include "core/memory.h"
@@ -27,9 +25,9 @@ static const size_t TILE_SIZE = 8 * 8;
 using ImageTile = std::array<u32, TILE_SIZE>;
 
 /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
-static void ConvertYUVToRGB(InputFormat input_format,
-        const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[],
-        unsigned int width, unsigned int height, const CoefficientSet& coefficients) {
+static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U,
+                            const u8* input_V, ImageTile output[], unsigned int width,
+                            unsigned int height, const CoefficientSet& coefficients) {
 
     for (unsigned int y = 0; y < height; ++y) {
         for (unsigned int x = 0; x < width; ++x) {
@@ -58,11 +56,11 @@ static void ConvertYUVToRGB(InputFormat input_format,
 
             // This conversion process is bit-exact with hardware, as far as could be tested.
             auto& c = coefficients;
-            s32 cY = c[0]*Y;
+            s32 cY = c[0] * Y;
 
-            s32 r = cY          + c[1]*V;
-            s32 g = cY - c[3]*U - c[2]*V;
-            s32 b = cY + c[4]*U;
+            s32 r = cY + c[1] * V;
+            s32 g = cY - c[2] * V - c[3] * U;
+            s32 b = cY + c[4] * U;
 
             const s32 rounding_offset = 0x18;
             r = (r >> 3) + c[5] + rounding_offset;
@@ -74,14 +72,14 @@ static void ConvertYUVToRGB(InputFormat input_format,
             u32* out = &output[tile][y * 8 + tile_x];
 
             using MathUtil::Clamp;
-            *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) |
-                   ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
+            *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
                    ((u32)Clamp(b >> 5, 0, 0xFF) << 8);
         }
     }
 }
 
-/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit.
+/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit
+/// formats to 8-bit.
 template <size_t N>
 static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) {
     const u8* input = Memory::GetPointer(buf.address);
@@ -103,9 +101,10 @@ static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data
     }
 }
 
-/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer.
+/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA
+/// transfer.
 static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data,
-        OutputFormat output_format, u8 alpha) {
+                     OutputFormat output_format, u8 alpha) {
 
     u8* output = Memory::GetPointer(buf.address);
 
@@ -113,9 +112,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
         u8* unit_end = output + buf.transfer_unit;
         while (output < unit_end) {
             u32 color = *input++;
-            Math::Vec4<u8> col_vec{
-                (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha
-            };
+            Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha};
 
             switch (output_format) {
             case OutputFormat::RGBA8:
@@ -145,7 +142,8 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
     }
 }
 
-static const u8 linear_lut[64] = {
+static const u8 linear_lut[TILE_SIZE] = {
+    // clang-format off
      0,  1,  2,  3,  4,  5,  6,  7,
      8,  9, 10, 11, 12, 13, 14, 15,
     16, 17, 18, 19, 20, 21, 22, 23,
@@ -154,9 +152,11 @@ static const u8 linear_lut[64] = {
     40, 41, 42, 43, 44, 45, 46, 47,
     48, 49, 50, 51, 52, 53, 54, 55,
     56, 57, 58, 59, 60, 61, 62, 63,
+    // clang-format on
 };
 
-static const u8 morton_lut[64] = {
+static const u8 morton_lut[TILE_SIZE] = {
+    // clang-format off
      0,  1,  4,  5, 16, 17, 20, 21,
      2,  3,  6,  7, 18, 19, 22, 23,
      8,  9, 12, 13, 24, 25, 28, 29,
@@ -165,15 +165,18 @@ static const u8 morton_lut[64] = {
     34, 35, 38, 39, 50, 51, 54, 55,
     40, 41, 44, 45, 56, 57, 60, 61,
     42, 43, 46, 47, 58, 59, 62, 63,
+    // clang-format on
 };
 
-static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
+static void RotateTile0(const ImageTile& input, ImageTile& output, int height,
+                        const u8 out_map[64]) {
     for (int i = 0; i < height * 8; ++i) {
         output[out_map[i]] = input[i];
     }
 }
 
-static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
+static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
+                         const u8 out_map[64]) {
     int out_i = 0;
     for (int x = 0; x < 8; ++x) {
         for (int y = height - 1; y >= 0; --y) {
@@ -182,16 +185,18 @@ static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
     }
 }
 
-static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
+static void RotateTile180(const ImageTile& input, ImageTile& output, int height,
+                          const u8 out_map[64]) {
     int out_i = 0;
     for (int i = height * 8 - 1; i >= 0; --i) {
         output[out_map[out_i++]] = input[i];
     }
 }
 
-static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
+static void RotateTile270(const ImageTile& input, ImageTile& output, int height,
+                          const u8 out_map[64]) {
     int out_i = 0;
-    for (int x = 8-1; x >= 0; --x) {
+    for (int x = 8 - 1; x >= 0; --x) {
         for (int y = 0; y < height; ++y) {
             output[out_map[out_i++]] = input[y * 8 + x];
         }
@@ -274,9 +279,11 @@ void PerformConversion(ConversionConfiguration& cvt) {
     const u8* tile_remap = nullptr;
     switch (cvt.block_alignment) {
     case BlockAlignment::Linear:
-        tile_remap = linear_lut; break;
+        tile_remap = linear_lut;
+        break;
     case BlockAlignment::Block8x8:
-        tile_remap = morton_lut; break;
+        tile_remap = morton_lut;
+        break;
     }
 
     for (unsigned int y = 0; y < cvt.input_lines; y += 8) {
@@ -320,7 +327,7 @@ void PerformConversion(ConversionConfiguration& cvt) {
         // Note(yuriks): If additional optimization is required, input_format can be moved to a
         // template parameter, so that its dispatch can be moved to outside the inner loop.
         ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(),
-                cvt.input_line_width, row_height, cvt.coefficients);
+                        cvt.input_line_width, row_height, cvt.coefficients);
 
         u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get());
 
@@ -367,9 +374,9 @@ void PerformConversion(ConversionConfiguration& cvt) {
 
         // Note(yuriks): If additional optimization is required, output_format can be moved to a
         // template parameter, so that its dispatch can be moved to outside the inner loop.
-        SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha);
+        SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size,
+                 cvt.output_format, (u8)cvt.alpha);
     }
 }
-
 }
 }
diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h
index 729e1eee3..6b6e71bec 100644
--- a/src/core/hw/y2r.h
+++ b/src/core/hw/y2r.h
@@ -3,13 +3,12 @@
 // Refer to the license.txt file included.
 
 namespace Y2R_U {
-    struct ConversionConfiguration;
+struct ConversionConfiguration;
 }
 
 namespace HW {
 namespace Y2R {
 
 void PerformConversion(Y2R_U::ConversionConfiguration& cvt);
-
 }
 }