6 files changed, 305 insertions, 350 deletions
diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp
index e241b31c8..635f50a53 100644
--- a/src/core/hle/service/gsp.cpp
+++ b/src/core/hle/service/gsp.cpp
@@ -32,7 +32,7 @@ static inline u8* GetCommandBuffer(u32 thread_id) {
     if (0 == g_shared_memory)
         return nullptr;
 
-    return Kernel::GetSharedMemoryPointer(g_shared_memory, 
+    return Kernel::GetSharedMemoryPointer(g_shared_memory,
         0x800 + (thread_id * sizeof(CommandBuffer)));
 }
 
@@ -173,11 +173,11 @@ void ExecuteCommand(const Command& command) {
     case CommandId::SET_COMMAND_LIST_LAST:
     {
         auto& params = command.set_command_list_last;
-        WriteGPURegister(GPU::Regs::CommandProcessor + 2, params.address >> 3);
-        WriteGPURegister(GPU::Regs::CommandProcessor, params.size >> 3);
+        WriteGPURegister(GPU_REG_INDEX(command_processor_config.address), Memory::VirtualToPhysicalAddress(params.address) >> 3);
+        WriteGPURegister(GPU_REG_INDEX(command_processor_config.size), params.size >> 3);
 
         // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though
-        WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1);
+        WriteGPURegister(GPU_REG_INDEX(command_processor_config.trigger), 1);
 
         // TODO: Move this to GPU
         // TODO: Not sure what units the size is measured in
@@ -193,20 +193,28 @@ void ExecuteCommand(const Command& command) {
     case CommandId::SET_MEMORY_FILL:
     {
         auto& params = command.memory_fill;
-        WriteGPURegister(GPU::Regs::MemoryFill, params.start1 >> 3);
-        WriteGPURegister(GPU::Regs::MemoryFill + 1, params.end1 >> 3);
-        WriteGPURegister(GPU::Regs::MemoryFill + 2, params.end1 - params.start1);
-        WriteGPURegister(GPU::Regs::MemoryFill + 3, params.value1);
-
-        WriteGPURegister(GPU::Regs::MemoryFill + 4, params.start2 >> 3);
-        WriteGPURegister(GPU::Regs::MemoryFill + 5, params.end2 >> 3);
-        WriteGPURegister(GPU::Regs::MemoryFill + 6, params.end2 - params.start2);
-        WriteGPURegister(GPU::Regs::MemoryFill + 7, params.value2);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_start), Memory::VirtualToPhysicalAddress(params.start1) >> 3);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_end), Memory::VirtualToPhysicalAddress(params.end1) >> 3);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].size), params.end1 - params.start1);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].value), params.value1);
+
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_start), Memory::VirtualToPhysicalAddress(params.start2) >> 3);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), Memory::VirtualToPhysicalAddress(params.end2) >> 3);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].size), params.end2 - params.start2);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].value), params.value2);
         break;
     }
 
-    // TODO: Check if texture copies are implemented correctly..
     case CommandId::SET_DISPLAY_TRANSFER:
+    {
+        auto& params = command.image_copy;
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1);
+
         // TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to
         // work well enough for running demos. Need to figure out how these all work and trigger
         // them correctly.
@@ -216,19 +224,20 @@ void ExecuteCommand(const Command& command) {
         SignalInterrupt(InterruptId::P3D);
         SignalInterrupt(InterruptId::DMA);
         break;
+    }
 
+    // TODO: Check if texture copies are implemented correctly..
     case CommandId::SET_TEXTURE_COPY:
     {
         auto& params = command.image_copy;
-        WriteGPURegister(GPU::Regs::DisplayTransfer, params.in_buffer_address >> 3);
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 1, params.out_buffer_address >> 3);
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 3, params.in_buffer_size);
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 2, params.out_buffer_size);
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 4, params.flags);
-
-        // TODO: Should this only be ORed with 1 for texture copies?
-        // trigger transfer
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags);
+
+        // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1?
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1);
         break;
     }
 
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index d94c2329b..87cf93bac 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -14,106 +14,29 @@
 
 #include "core/hw/gpu.h"
 
+#include "video_core/command_processor.h"
 #include "video_core/video_core.h"
 
 
 namespace GPU {
 
-RegisterSet<u32, Regs> g_regs;
+Regs g_regs;
 
 u32 g_cur_line = 0;         ///< Current vertical screen line
 u64 g_last_line_ticks = 0;  ///< CPU tick count from last vertical screen line
 
-/**
- * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM
- * @param
- */
-void SetFramebufferLocation(const FramebufferLocation mode) {
-    switch (mode) {
-    case FRAMEBUFFER_LOCATION_FCRAM:
-    {
-        auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
-        auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
-
-        framebuffer_top.address_left1  = PADDR_TOP_LEFT_FRAME1;
-        framebuffer_top.address_left2  = PADDR_TOP_LEFT_FRAME2;
-        framebuffer_top.address_right1 = PADDR_TOP_RIGHT_FRAME1;
-        framebuffer_top.address_right2 = PADDR_TOP_RIGHT_FRAME2;
-        framebuffer_sub.address_left1  = PADDR_SUB_FRAME1;
-        //framebuffer_sub.address_left2  = unknown;
-        framebuffer_sub.address_right1 = PADDR_SUB_FRAME2;
-        //framebuffer_sub.address_right2 = unknown;
-        break;
-    }
-
-    case FRAMEBUFFER_LOCATION_VRAM:
-    {
-        auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
-        auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
-
-        framebuffer_top.address_left1  = PADDR_VRAM_TOP_LEFT_FRAME1;
-        framebuffer_top.address_left2  = PADDR_VRAM_TOP_LEFT_FRAME2;
-        framebuffer_top.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1;
-        framebuffer_top.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2;
-        framebuffer_sub.address_left1  = PADDR_VRAM_SUB_FRAME1;
-        //framebuffer_sub.address_left2  = unknown;
-        framebuffer_sub.address_right1 = PADDR_VRAM_SUB_FRAME2;
-        //framebuffer_sub.address_right2 = unknown;
-        break;
-    }
-    }
-}
-
-/**
- * Gets the location of the framebuffers
- * @return Location of framebuffers as FramebufferLocation enum
- */
-FramebufferLocation GetFramebufferLocation(u32 address) {
-    if ((address & ~Memory::VRAM_MASK) == Memory::VRAM_PADDR) {
-        return FRAMEBUFFER_LOCATION_VRAM;
-    } else if ((address & ~Memory::FCRAM_MASK) == Memory::FCRAM_PADDR) {
-        return FRAMEBUFFER_LOCATION_FCRAM;
-    } else {
-        ERROR_LOG(GPU, "unknown framebuffer location!");
-    }
-    return FRAMEBUFFER_LOCATION_UNKNOWN;
-}
-
-u32 GetFramebufferAddr(const u32 address) {
-    switch (GetFramebufferLocation(address)) {
-    case FRAMEBUFFER_LOCATION_FCRAM:
-        return Memory::VirtualAddressFromPhysical_FCRAM(address);
-    case FRAMEBUFFER_LOCATION_VRAM:
-        return Memory::VirtualAddressFromPhysical_VRAM(address);
-    default:
-        ERROR_LOG(GPU, "unknown framebuffer location");
-    }
-    return 0;
-}
-
-/**
- * Gets a read-only pointer to a framebuffer in memory
- * @param address Physical address of framebuffer
- * @return Returns const pointer to raw framebuffer
- */
-const u8* GetFramebufferPointer(const u32 address) {
-    u32 addr = GetFramebufferAddr(address);
-    return (addr != 0) ? Memory::GetPointer(addr) : nullptr;
-}
-
 template <typename T>
 inline void Read(T &var, const u32 raw_addr) {
     u32 addr = raw_addr - 0x1EF00000;
     int index = addr / 4;
 
     // Reads other than u32 are untested, so I'd rather have them abort than silently fail
-    if (index >= Regs::NumIds || !std::is_same<T,u32>::value)
-    {
+    if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) {
         ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr);
         return;
     }
 
-    var = g_regs[static_cast<Regs::Id>(addr / 4)];
+    var = g_regs[addr / 4];
 }
 
 template <typename T>
@@ -122,28 +45,28 @@ inline void Write(u32 addr, const T data) {
     int index = addr / 4;
 
     // Writes other than u32 are untested, so I'd rather have them abort than silently fail
-    if (index >= Regs::NumIds || !std::is_same<T,u32>::value)
-    {
+    if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) {
         ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr);
         return;
     }
 
-    g_regs[static_cast<Regs::Id>(index)] = data;
+    g_regs[index] = data;
 
-    switch (static_cast<Regs::Id>(index)) {
+    switch (index) {
 
     // Memory fills are triggered once the fill value is written.
     // NOTE: This is not verified.
-    case Regs::MemoryFill + 3:
-    case Regs::MemoryFill + 7:
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].value, 0x00004 + 0x3):
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].value, 0x00008 + 0x3):
     {
-        const auto& config = g_regs.Get<Regs::MemoryFill>(static_cast<Regs::Id>(index - 3));
+        const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].value));
+        const auto& config = g_regs.memory_fill_config[is_second_filler];
 
         // TODO: Not sure if this check should be done at GSP level instead
         if (config.address_start) {
             // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all
-            u32* start = (u32*)Memory::GetPointer(config.GetStartAddress());
-            u32* end = (u32*)Memory::GetPointer(config.GetEndAddress());
+            u32* start = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress()));
+            u32* end = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress()));
             for (u32* ptr = start; ptr < end; ++ptr)
                 *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation
 
@@ -152,12 +75,12 @@ inline void Write(u32 addr, const T data) {
         break;
     }
 
-    case Regs::DisplayTransfer + 6:
+    case GPU_REG_INDEX(display_transfer_config.trigger):
     {
-        const auto& config = g_regs.Get<Regs::DisplayTransfer>();
+        const auto& config = g_regs.display_transfer_config;
         if (config.trigger & 1) {
-            u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress());
-            u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress());
+            u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress()));
+            u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress()));
 
             for (int y = 0; y < config.output_height; ++y) {
                 // TODO: Why does the register seem to hold twice the framebuffer width?
@@ -221,14 +144,15 @@ inline void Write(u32 addr, const T data) {
         break;
     }
 
-    case Regs::CommandProcessor + 4:
+    // Seems like writing to this register triggers processing
+    case GPU_REG_INDEX(command_processor_config.trigger):
     {
-        const auto& config = g_regs.Get<Regs::CommandProcessor>();
+        const auto& config = g_regs.command_processor_config;
         if (config.trigger & 1)
         {
-            // u32* buffer = (u32*)Memory::GetPointer(config.address << 3);
-            ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.address << 3);
-            // TODO: Process command list!
+            u32* buffer = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalAddress()));
+            u32 size = config.size << 3;
+            Pica::CommandProcessor::ProcessCommandList(buffer, size);
         }
         break;
     }
@@ -252,7 +176,7 @@ template void Write<u8>(u32 addr, const u8 data);
 
 /// Update hardware
 void Update() {
-    auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
+    auto& framebuffer_top = g_regs.framebuffer_config[0];
     u64 current_ticks = Core::g_app_core->GetTicks();
 
     // Synchronize line...
@@ -277,11 +201,22 @@ void Init() {
     g_cur_line = 0;
     g_last_line_ticks = Core::g_app_core->GetTicks();
 
-//    SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM);
-    SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM);
+    auto& framebuffer_top = g_regs.framebuffer_config[0];
+    auto& framebuffer_sub = g_regs.framebuffer_config[1];
+
+    // Setup default framebuffer addresses (located in VRAM)
+    // .. or at least these are the ones used by system applets.
+    // There's probably a smarter way to come up with addresses
+    // like this which does not require hardcoding.
+    framebuffer_top.address_left1  = 0x181E6000;
+    framebuffer_top.address_left2  = 0x1822C800;
+    framebuffer_top.address_right1 = 0x18273000;
+    framebuffer_top.address_right2 = 0x182B9800;
+    framebuffer_sub.address_left1  = 0x1848F000;
+    //framebuffer_sub.address_left2  = unknown;
+    framebuffer_sub.address_right1 = 0x184C7800;
+    //framebuffer_sub.address_right2 = unknown;
 
-    auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
-    auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
     // TODO: Width should be 240 instead?
     framebuffer_top.width = 480;
     framebuffer_top.height = 400;
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 42f18a0e7..d20311a00 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -4,32 +4,57 @@
 
 #pragma once
 
+#include <cstddef>
+
 #include "common/common_types.h"
 #include "common/bit_field.h"
-#include "common/register_set.h"
 
 namespace GPU {
 
 static const u32 kFrameCycles   = 268123480 / 60;   ///< 268MHz / 60 frames per second
 static const u32 kFrameTicks    = kFrameCycles / 3; ///< Approximate number of instructions/frame
 
+// Returns index corresponding to the Regs member labeled by field_name
+// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
+//       when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])).
+//       For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
+//       Hopefully, this will be fixed sometime in the future.
+//       For lack of better alternatives, we currently hardcode the offsets when constant
+//       expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
+//       will then make sure the offsets indeed match the automatically calculated ones).
+#define GPU_REG_INDEX(field_name) (offsetof(GPU::Regs, field_name) / sizeof(u32))
+#if defined(_MSC_VER)
+#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index)
+#else
+// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler
+//       really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX
+//       and then performs a (no-op) cast to size_t iff the second argument matches the expected
+//       field offset. Otherwise, the compiler will fail to compile this code.
+#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
+    ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name))
+#endif
+
 // MMIO region 0x1EFxxxxx
 struct Regs {
-    enum Id : u32 {
-        MemoryFill                = 0x00004, // + 5,6,7; second block at 8-11
-
-        FramebufferTop            = 0x00117, // + 11a,11b,11c,11d(?),11e...126
-        FramebufferBottom         = 0x00157, // + 15a,15b,15c,15d(?),15e...166
 
-        DisplayTransfer           = 0x00300, // + 301,302,303,304,305,306
-
-        CommandProcessor          = 0x00638, // + 63a,63c
-
-        NumIds                    = 0x01000
-    };
-
-    template<Id id>
-    struct Struct;
+// helper macro to properly align structure members.
+// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121",
+// depending on the current source line to make sure variable names are unique.
+#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y
+#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y)
+#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)];
+
+// helper macro to make sure the defined structures are of the expected size.
+#if defined(_MSC_VER)
+// TODO: MSVC does not support using sizeof() on non-static data members even though this
+//       is technically allowed since C++11. This macro should be enabled once MSVC adds
+//       support for that.
+#define ASSERT_MEMBER_SIZE(name, size_in_bytes)
+#else
+#define ASSERT_MEMBER_SIZE(name, size_in_bytes)  \
+    static_assert(sizeof(name) == size_in_bytes, \
+                  "Structure size and register block length don't match");
+#endif
 
     enum class FramebufferFormat : u32 {
         RGBA8  = 0,
@@ -38,201 +63,191 @@ struct Regs {
         RGB5A1 = 3,
         RGBA4  = 4,
     };
-};
 
-template<>
-struct Regs::Struct<Regs::MemoryFill> {
-    u32 address_start;
-    u32 address_end; // ?
-    u32 size;
-    u32 value; // ?
+    INSERT_PADDING_WORDS(0x4);
 
-    inline u32 GetStartAddress() const {
-        return address_start * 8;
-    }
+    struct {
+        u32 address_start;
+        u32 address_end; // ?
+        u32 size;
+        u32 value; // ?
 
-    inline u32 GetEndAddress() const {
-        return address_end * 8;
-    }
-};
-static_assert(sizeof(Regs::Struct<Regs::MemoryFill>) == 0x10, "Structure size and register block length don't match");
+        inline u32 GetStartAddress() const {
+            return DecodeAddressRegister(address_start);
+        }
 
-template<>
-struct Regs::Struct<Regs::FramebufferTop> {
-    using Format = Regs::FramebufferFormat;
+        inline u32 GetEndAddress() const {
+            return DecodeAddressRegister(address_end);
+        }
+    } memory_fill_config[2];
+    ASSERT_MEMBER_SIZE(memory_fill_config[0], 0x10);
 
-    union {
-        u32 size;
+    INSERT_PADDING_WORDS(0x10b);
 
-        BitField< 0, 16, u32> width;
-        BitField<16, 16, u32> height;
-    };
+    struct {
+        using Format = Regs::FramebufferFormat;
 
-    u32 pad0[2];
+        union {
+            u32 size;
 
-    u32 address_left1;
-    u32 address_left2;
+            BitField< 0, 16, u32> width;
+            BitField<16, 16, u32> height;
+        };
 
-    union {
-        u32 format;
+        INSERT_PADDING_WORDS(0x2);
 
-        BitField< 0, 3, Format> color_format;
-    };
+        u32 address_left1;
+        u32 address_left2;
 
-    u32 pad1;
+        union {
+            u32 format;
 
-    union {
-        u32 active_fb;
+            BitField< 0, 3, Format> color_format;
+        };
 
-        // 0: Use parameters ending with "1"
-        // 1: Use parameters ending with "2"
-        BitField<0, 1, u32> second_fb_active;
-    };
+        INSERT_PADDING_WORDS(0x1);
 
-    u32 pad2[5];
+        union {
+            u32 active_fb;
 
-    // Distance between two pixel rows, in bytes
-    u32 stride;
+            // 0: Use parameters ending with "1"
+            // 1: Use parameters ending with "2"
+            BitField<0, 1, u32> second_fb_active;
+        };
 
-    u32 address_right1;
-    u32 address_right2;
-};
+        INSERT_PADDING_WORDS(0x5);
 
-template<>
-struct Regs::Struct<Regs::FramebufferBottom> : public Regs::Struct<Regs::FramebufferTop> {
-};
-static_assert(sizeof(Regs::Struct<Regs::FramebufferTop>) == 0x40, "Structure size and register block length don't match");
+        // Distance between two pixel rows, in bytes
+        u32 stride;
 
-template<>
-struct Regs::Struct<Regs::DisplayTransfer> {
-    using Format = Regs::FramebufferFormat;
+        u32 address_right1;
+        u32 address_right2;
 
-    u32 input_address;
-    u32 output_address;
+        INSERT_PADDING_WORDS(0x30);
+    } framebuffer_config[2];
+    ASSERT_MEMBER_SIZE(framebuffer_config[0], 0x100);
 
-    inline u32 GetPhysicalInputAddress() const {
-        return input_address * 8;
-    }
+    INSERT_PADDING_WORDS(0x169);
 
-    inline u32 GetPhysicalOutputAddress() const {
-        return output_address * 8;
-    }
+    struct {
+        using Format = Regs::FramebufferFormat;
 
-    union {
-        u32 output_size;
+        u32 input_address;
+        u32 output_address;
 
-        BitField< 0, 16, u32> output_width;
-        BitField<16, 16, u32> output_height;
-    };
+        inline u32 GetPhysicalInputAddress() const {
+            return DecodeAddressRegister(input_address);
+        }
 
-    union {
-        u32 input_size;
+        inline u32 GetPhysicalOutputAddress() const {
+            return DecodeAddressRegister(output_address);
+        }
 
-        BitField< 0, 16, u32> input_width;
-        BitField<16, 16, u32> input_height;
-    };
+        union {
+            u32 output_size;
 
-    union {
-        u32 flags;
+            BitField< 0, 16, u32> output_width;
+            BitField<16, 16, u32> output_height;
+        };
 
-        BitField< 0, 1, u32> flip_data;        // flips input data horizontally (TODO) if true
-        BitField< 8, 3, Format> input_format;
-        BitField<12, 3, Format> output_format;
-        BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
-    };
+        union {
+            u32 input_size;
 
-    u32 unknown;
+            BitField< 0, 16, u32> input_width;
+            BitField<16, 16, u32> input_height;
+        };
 
-    // it seems that writing to this field triggers the display transfer
-    u32 trigger;
-};
-static_assert(sizeof(Regs::Struct<Regs::DisplayTransfer>) == 0x1C, "Structure size and register block length don't match");
+        union {
+            u32 flags;
 
-template<>
-struct Regs::Struct<Regs::CommandProcessor> {
-    // command list size
-    u32 size;
+            BitField< 0, 1, u32> flip_data;        // flips input data horizontally (TODO) if true
+            BitField< 8, 3, Format> input_format;
+            BitField<12, 3, Format> output_format;
+            BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
+        };
 
-    u32 pad0;
+        INSERT_PADDING_WORDS(0x1);
 
-    // command list address
-    u32 address;
+        // it seems that writing to this field triggers the display transfer
+        u32 trigger;
+    } display_transfer_config;
+    ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c);
 
-    u32 pad1;
+    INSERT_PADDING_WORDS(0x331);
 
-    // it seems that writing to this field triggers command list processing
-    u32 trigger;
-};
-static_assert(sizeof(Regs::Struct<Regs::CommandProcessor>) == 0x14, "Structure size and register block length don't match");
-
-
-extern RegisterSet<u32, Regs> g_regs;
-
-enum {
-    TOP_ASPECT_X        = 0x5,
-    TOP_ASPECT_Y        = 0x3,
-
-    TOP_HEIGHT          = 240,
-    TOP_WIDTH           = 400,
-    BOTTOM_WIDTH        = 320,
-
-    // Physical addresses in FCRAM (chosen arbitrarily)
-    PADDR_TOP_LEFT_FRAME1       = 0x201D4C00,
-    PADDR_TOP_LEFT_FRAME2       = 0x202D4C00,
-    PADDR_TOP_RIGHT_FRAME1      = 0x203D4C00,
-    PADDR_TOP_RIGHT_FRAME2      = 0x204D4C00,
-    PADDR_SUB_FRAME1            = 0x205D4C00,
-    PADDR_SUB_FRAME2            = 0x206D4C00,
-    // Physical addresses in FCRAM used by ARM9 applications
-/*    PADDR_TOP_LEFT_FRAME1       = 0x20184E60,
-    PADDR_TOP_LEFT_FRAME2       = 0x201CB370,
-    PADDR_TOP_RIGHT_FRAME1      = 0x20282160,
-    PADDR_TOP_RIGHT_FRAME2      = 0x202C8670,
-    PADDR_SUB_FRAME1            = 0x202118E0,
-    PADDR_SUB_FRAME2            = 0x20249CF0,*/
-
-    // Physical addresses in VRAM
-    // TODO: These should just be deduced from the ones above
-    PADDR_VRAM_TOP_LEFT_FRAME1  = 0x181D4C00,
-    PADDR_VRAM_TOP_LEFT_FRAME2  = 0x182D4C00,
-    PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x183D4C00,
-    PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x184D4C00,
-    PADDR_VRAM_SUB_FRAME1       = 0x185D4C00,
-    PADDR_VRAM_SUB_FRAME2       = 0x186D4C00,
-    // Physical addresses in VRAM used by ARM9 applications
-/*    PADDR_VRAM_TOP_LEFT_FRAME2  = 0x181CB370,
-    PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x18282160,
-    PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x182C8670,
-    PADDR_VRAM_SUB_FRAME1       = 0x182118E0,
-    PADDR_VRAM_SUB_FRAME2       = 0x18249CF0,*/
-};
+    struct {
+        // command list size
+        u32 size;
 
-/// Framebuffer location
-enum FramebufferLocation {
-    FRAMEBUFFER_LOCATION_UNKNOWN,   ///< Framebuffer location is unknown
-    FRAMEBUFFER_LOCATION_FCRAM,     ///< Framebuffer is in the GSP heap
-    FRAMEBUFFER_LOCATION_VRAM,      ///< Framebuffer is in VRAM
-};
+        INSERT_PADDING_WORDS(0x1);
+
+        // command list address
+        u32 address;
+
+        INSERT_PADDING_WORDS(0x1);
+
+        // it seems that writing to this field triggers command list processing
+        u32 trigger;
+
+        inline u32 GetPhysicalAddress() const {
+            return DecodeAddressRegister(address);
+        }
+    } command_processor_config;
+    ASSERT_MEMBER_SIZE(command_processor_config, 0x14);
 
-/**
- * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM
- * @param
- */
-void SetFramebufferLocation(const FramebufferLocation mode);
-
-/**
- * Gets a read-only pointer to a framebuffer in memory
- * @param address Physical address of framebuffer
- * @return Returns const pointer to raw framebuffer
- */
-const u8* GetFramebufferPointer(const u32 address);
-
-u32 GetFramebufferAddr(const u32 address);
-
-/**
- * Gets the location of the framebuffers
- */
-FramebufferLocation GetFramebufferLocation(u32 address);
+    INSERT_PADDING_WORDS(0x9c3);
+
+#undef INSERT_PADDING_WORDS_HELPER1
+#undef INSERT_PADDING_WORDS_HELPER2
+#undef INSERT_PADDING_WORDS
+
+    static inline int NumIds() {
+        return sizeof(Regs) / sizeof(u32);
+    }
+
+    u32& operator [] (int index) const {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+    u32& operator [] (int index) {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+private:
+    /*
+     * Most physical addresses which GPU registers refer to are 8-byte aligned.
+     * This function should be used to get the address from a raw register value.
+     */
+    static inline u32 DecodeAddressRegister(u32 register_value) {
+        return register_value * 8;
+    }
+};
+static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
+
+// TODO: MSVC does not support using offsetof() on non-static data members even though this
+//       is technically allowed since C++11. This macro should be enabled once MSVC adds
+//       support for that.
+#ifndef _MSC_VER
+#define ASSERT_REG_POSITION(field_name, position)             \
+    static_assert(offsetof(Regs, field_name) == position * 4, \
+                  "Field "#field_name" has invalid position")
+
+ASSERT_REG_POSITION(memory_fill_config[0],    0x00004);
+ASSERT_REG_POSITION(memory_fill_config[1],    0x00008);
+ASSERT_REG_POSITION(framebuffer_config[0],    0x00117);
+ASSERT_REG_POSITION(framebuffer_config[1],    0x00157);
+ASSERT_REG_POSITION(display_transfer_config,  0x00300);
+ASSERT_REG_POSITION(command_processor_config, 0x00638);
+
+#undef ASSERT_REG_POSITION
+#endif // !defined(_MSC_VER)
+
+// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
+static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set");
+
+extern Regs g_regs;
 
 template <typename T>
 void Read(T &var, const u32 addr);
diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp
index c45746be9..14fc01471 100644
--- a/src/core/mem_map.cpp
+++ b/src/core/mem_map.cpp
@@ -72,14 +72,14 @@ void Init() {
 
     g_base = MemoryMap_Setup(g_views, kNumMemViews, flags, &g_arena);
 
-    NOTICE_LOG(MEMMAP, "initialized OK, RAM at %p (mirror at 0 @ %p)", g_heap, 
+    NOTICE_LOG(MEMMAP, "initialized OK, RAM at %p (mirror at 0 @ %p)", g_heap,
         g_physical_fcram);
 }
 
 void Shutdown() {
     u32 flags = 0;
     MemoryMap_Shutdown(g_views, kNumMemViews, flags, &g_arena);
-    
+
     g_arena.ReleaseSpace();
     g_base = NULL;
 
diff --git a/src/core/mem_map.h b/src/core/mem_map.h
index 12941f558..3c7810573 100644
--- a/src/core/mem_map.h
+++ b/src/core/mem_map.h
@@ -14,7 +14,6 @@ namespace Memory {
 enum {
     BOOTROM_SIZE            = 0x00010000,   ///< Bootrom (super secret code/data @ 0x8000) size
     MPCORE_PRIV_SIZE        = 0x00002000,   ///< MPCore private memory region size
-    VRAM_SIZE               = 0x00600000,   ///< VRAM size
     DSP_SIZE                = 0x00080000,   ///< DSP memory size
     AXI_WRAM_SIZE           = 0x00080000,   ///< AXI WRAM size
 
@@ -23,8 +22,6 @@ enum {
     FCRAM_PADDR_END         = (FCRAM_PADDR + FCRAM_SIZE),       ///< FCRAM end of physical space
     FCRAM_VADDR             = 0x08000000,                       ///< FCRAM virtual address
     FCRAM_VADDR_END         = (FCRAM_VADDR + FCRAM_SIZE),       ///< FCRAM end of virtual space
-    FCRAM_VADDR_FW0B        = 0xF0000000,                       ///< FCRAM adress for firmare FW0B
-    FCRAM_VADDR_FW0B_END    = (FCRAM_VADDR_FW0B + FCRAM_SIZE),  ///< FCRAM adress end for FW0B
     FCRAM_MASK              = (FCRAM_SIZE - 1),                 ///< FCRAM mask
 
     SHARED_MEMORY_SIZE      = 0x04000000,   ///< Shared memory size
@@ -73,6 +70,7 @@ enum {
     HARDWARE_IO_PADDR_END   = (HARDWARE_IO_PADDR + HARDWARE_IO_SIZE),
     HARDWARE_IO_VADDR_END   = (HARDWARE_IO_VADDR + HARDWARE_IO_SIZE),
 
+    VRAM_SIZE               = 0x00600000,
     VRAM_PADDR              = 0x18000000,
     VRAM_VADDR              = 0x1F000000,
     VRAM_PADDR_END          = (VRAM_PADDR + VRAM_SIZE),
@@ -112,7 +110,7 @@ struct MemoryBlock {
 
 // In 64-bit, this might point to "high memory" (above the 32-bit limit),
 // so be sure to load it into a 64-bit register.
-extern u8 *g_base; 
+extern u8 *g_base;
 
 // These are guaranteed to point to "low memory" addresses (sub-32-bit).
 // 64-bit: Pointers to low-mem (sub-0x10000000) mirror
@@ -147,7 +145,7 @@ void Write32(const u32 addr, const u32 data);
 
 void WriteBlock(const u32 addr, const u8* data, const int size);
 
-u8* GetPointer(const u32 Address);
+u8* GetPointer(const u32 virtual_address);
 
 /**
  * Maps a block of memory on the heap
@@ -169,16 +167,10 @@ inline const char* GetCharPointer(const u32 address) {
     return (const char *)GetPointer(address);
 }
 
-inline const u32 VirtualAddressFromPhysical_FCRAM(const u32 address) {
-    return ((address & FCRAM_MASK) | FCRAM_VADDR);
-}
-
-inline const u32 VirtualAddressFromPhysical_IO(const u32 address) {
-    return (address + 0x0EB00000);
-}
+/// Converts a physical address to virtual address
+u32 PhysicalToVirtualAddress(const u32 addr);
 
-inline const u32 VirtualAddressFromPhysical_VRAM(const u32 address) {
-    return (address + 0x07000000);
-}
+/// Converts a virtual address to physical address
+u32 VirtualToPhysicalAddress(const u32 addr);
 
 } // namespace
diff --git a/src/core/mem_map_funcs.cpp b/src/core/mem_map_funcs.cpp
index 305be8468..5772cca52 100644
--- a/src/core/mem_map_funcs.cpp
+++ b/src/core/mem_map_funcs.cpp
@@ -17,37 +17,44 @@ std::map<u32, MemoryBlock> g_heap_map;
 std::map<u32, MemoryBlock> g_heap_gsp_map;
 std::map<u32, MemoryBlock> g_shared_map;
 
-/// Convert a physical address (or firmware-specific virtual address) to primary virtual address
-u32 _VirtualAddress(const u32 addr) {
-    // Our memory interface read/write functions assume virtual addresses. Put any physical address 
-    // to virtual address translations here. This is obviously quite hacky... But we're not doing 
-    // any MMU emulation yet or anything
-    if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) {
-        return VirtualAddressFromPhysical_FCRAM(addr);
-
-    // Virtual address mapping FW0B
-    } else if ((addr >= FCRAM_VADDR_FW0B) && (addr < FCRAM_VADDR_FW0B_END)) {
-        return VirtualAddressFromPhysical_FCRAM(addr);
-
-    // Hardware IO
-    // TODO(bunnei): FixMe
-    // This isn't going to work... The physical address of HARDWARE_IO conflicts with the virtual 
-    // address of shared memory.
-    //} else if ((addr >= HARDWARE_IO_PADDR) && (addr < HARDWARE_IO_PADDR_END)) {
-    //    return (addr + 0x0EB00000);
+/// Convert a physical address to virtual address
+u32 PhysicalToVirtualAddress(const u32 addr) {
+    // Our memory interface read/write functions assume virtual addresses. Put any physical address
+    // to virtual address translations here. This is quite hacky, but necessary until we implement
+    // proper MMU emulation.
+    // TODO: Screw it, I'll let bunnei figure out how to do this properly.
+    if ((addr >= VRAM_PADDR) && (addr < VRAM_PADDR_END)) {
+        return addr - VRAM_PADDR + VRAM_VADDR;
+    }else if ((addr >= FCRAM_PADDR) && (addr < FCRAM_PADDR_END)) {
+        return addr - FCRAM_PADDR + FCRAM_VADDR;
+    }
+
+    ERROR_LOG(MEMMAP, "Unknown physical address @ 0x%08x", addr);
+    return addr;
+}
 
+/// Convert a physical address to virtual address
+u32 VirtualToPhysicalAddress(const u32 addr) {
+    // Our memory interface read/write functions assume virtual addresses. Put any physical address
+    // to virtual address translations here. This is quite hacky, but necessary until we implement
+    // proper MMU emulation.
+    // TODO: Screw it, I'll let bunnei figure out how to do this properly.
+    if ((addr >= VRAM_VADDR) && (addr < VRAM_VADDR_END)) {
+        return addr - 0x07000000;
+    } else if ((addr >= FCRAM_VADDR) && (addr < FCRAM_VADDR_END)) {
+        return addr - FCRAM_VADDR + FCRAM_PADDR;
     }
+
+    ERROR_LOG(MEMMAP, "Unknown virtual address @ 0x%08x", addr);
     return addr;
 }
 
 template <typename T>
-inline void Read(T &var, const u32 addr) {
+inline void Read(T &var, const u32 vaddr) {
     // TODO: Figure out the fastest order of tests for both read and write (they are probably different).
     // TODO: Make sure this represents the mirrors in a correct way.
     // Could just do a base-relative read, too.... TODO
 
-    const u32 vaddr = _VirtualAddress(addr);
-
     // Kernel memory command buffer
     if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
         var = *((const T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK]);
@@ -91,9 +98,8 @@ inline void Read(T &var, const u32 addr) {
 }
 
 template <typename T>
-inline void Write(u32 addr, const T data) {
-    u32 vaddr = _VirtualAddress(addr);
-    
+inline void Write(u32 vaddr, const T data) {
+
     // Kernel memory command buffer
     if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
         *(T*)&g_kernel_mem[vaddr & KERNEL_MEMORY_MASK] = data;
@@ -133,16 +139,14 @@ inline void Write(u32 addr, const T data) {
     //    _assert_msg_(MEMMAP, false, "umimplemented write to Configuration Memory");
     //} else if ((vaddr & 0xFFFFF000) == 0x1FF81000) {
     //    _assert_msg_(MEMMAP, false, "umimplemented write to shared page");
-    
+
     // Error out...
     } else {
         ERROR_LOG(MEMMAP, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, vaddr);
     }
 }
 
-u8 *GetPointer(const u32 addr) {
-    const u32 vaddr = _VirtualAddress(addr);
-
+u8 *GetPointer(const u32 vaddr) {
     // Kernel memory command buffer
     if (vaddr >= KERNEL_MEMORY_VADDR && vaddr < KERNEL_MEMORY_VADDR_END) {
         return g_kernel_mem + (vaddr & KERNEL_MEMORY_MASK);
@@ -185,12 +189,12 @@ u8 *GetPointer(const u32 addr) {
  */
 u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) {
     MemoryBlock block;
-    
+
     block.base_address  = HEAP_VADDR;
     block.size          = size;
     block.operation     = operation;
     block.permissions   = permissions;
-    
+
     if (g_heap_map.size() > 0) {
         const MemoryBlock last_block = g_heap_map.rbegin()->second;
         block.address = last_block.address + last_block.size;
@@ -208,12 +212,12 @@ u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) {
  */
 u32 MapBlock_HeapGSP(u32 size, u32 operation, u32 permissions) {
     MemoryBlock block;
-    
+
     block.base_address  = HEAP_GSP_VADDR;
     block.size          = size;
     block.operation     = operation;
     block.permissions   = permissions;
-    
+
     if (g_heap_gsp_map.size() > 0) {
         const MemoryBlock last_block = g_heap_gsp_map.rbegin()->second;
         block.address = last_block.address + last_block.size;