14 files changed, 165 insertions, 248 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5c0ae8009..a46ed4bd7 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -23,12 +23,17 @@ Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager&
     : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {}
 
 void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
-    auto macro_code = uploaded_macros.find(method);
+    // Reset the current macro.
+    executing_macro = 0;
+
     // The requested macro must have been uploaded already.
-    ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method);
+    auto macro_code = uploaded_macros.find(method);
+    if (macro_code == uploaded_macros.end()) {
+        LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
+        return;
+    }
 
-    // Reset the current macro and execute it.
-    executing_macro = 0;
+    // Execute the current macro.
     macro_interpreter.Execute(macro_code->second, std::move(parameters));
 }
 
@@ -238,6 +243,8 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
 
     auto& buffer = shader.const_buffers[bind_data.index];
 
+    ASSERT(bind_data.index < Regs::MaxConstBuffers);
+
     buffer.enabled = bind_data.valid.Value() != 0;
     buffer.index = bind_data.index;
     buffer.address = regs.const_buffer.BufferAddress();
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 4d0ff96a5..0506ac8fe 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -44,7 +44,7 @@ public:
         static constexpr size_t MaxShaderProgram = 6;
         static constexpr size_t MaxShaderStage = 5;
         // Maximum number of const buffers per shader stage.
-        static constexpr size_t MaxConstBuffers = 16;
+        static constexpr size_t MaxConstBuffers = 18;
 
         enum class QueryMode : u32 {
             Write = 0,
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c7e3fb4b1..3d4557b7e 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -78,6 +78,8 @@ union Attribute {
         // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
         // shader.
         TessCoordInstanceIDVertexID = 47,
+        // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
+        Unknown_63 = 63,
     };
 
     union {
@@ -254,20 +256,15 @@ union Instruction {
             BitField<56, 1, u64> invert_b;
         } lop32i;
 
-        float GetImm20_19() const {
-            float result{};
+        u32 GetImm20_19() const {
             u32 imm{static_cast<u32>(imm20_19)};
             imm <<= 12;
             imm |= negate_imm ? 0x80000000 : 0;
-            std::memcpy(&result, &imm, sizeof(imm));
-            return result;
+            return imm;
         }
 
-        float GetImm20_32() const {
-            float result{};
-            s32 imm{static_cast<s32>(imm20_32)};
-            std::memcpy(&result, &imm, sizeof(imm));
-            return result;
+        u32 GetImm20_32() const {
+            return static_cast<u32>(imm20_32);
         }
 
         s32 GetSignedImm20_20() const {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index b2a83ce0b..4ff4d71c5 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -42,6 +42,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
     case RenderTargetFormat::RGB10_A2_UNORM:
     case RenderTargetFormat::BGRA8_UNORM:
     case RenderTargetFormat::R32_FLOAT:
+    case RenderTargetFormat::R11G11B10_FLOAT:
         return 4;
     default:
         UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format));
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 440505c9d..874eddd78 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -34,6 +34,7 @@ enum class RenderTargetFormat : u32 {
     RG16_FLOAT = 0xDE,
     R11G11B10_FLOAT = 0xE0,
     R32_FLOAT = 0xE5,
+    B5G6R5_UNORM = 0xE8,
     R16_FLOAT = 0xF2,
     R8_UNORM = 0xF3,
 };
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c2a931469..8360feb5d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -161,7 +161,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
     // assume every shader uses them all.
     for (unsigned index = 0; index < 16; ++index) {
         auto& attrib = regs.vertex_attrib_format[index];
-        LOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
+        LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
                   index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
                   attrib.offset.Value(), attrib.IsNormalized());
 
@@ -324,11 +324,14 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
                                                                     bool using_depth_fb) {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
+    if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) {
+        LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured");
+        using_color_fb = false;
+    }
+
     // TODO(bunnei): Implement this
     const bool has_stencil = false;
 
-    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
-
     const bool write_color_fb =
         state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
         state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
@@ -341,9 +344,10 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
     Surface depth_surface;
     MathUtil::Rectangle<u32> surfaces_rect;
     std::tie(color_surface, depth_surface, surfaces_rect) =
-        res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect);
+        res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb);
 
-    MathUtil::Rectangle<u32> draw_rect{
+    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
+    const MathUtil::Rectangle<u32> draw_rect{
         static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
                                          surfaces_rect.left, surfaces_rect.right)), // Left
         static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
@@ -659,7 +663,10 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
         auto& buffer_draw_state =
             state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
 
-        ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer");
+        if (!buffer.enabled) {
+            continue;
+        }
+
         buffer_draw_state.enabled = true;
         buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
 
@@ -804,9 +811,7 @@ void RasterizerOpenGL::SyncClipCoef() {
 void RasterizerOpenGL::SyncCullMode() {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
-    // TODO(bunnei): Enable the below once more things work - until then, this may hide regressions
-    // state.cull.enabled = regs.cull.enabled != 0;
-    state.cull.enabled = false;
+    state.cull.enabled = regs.cull.enabled != 0;
 
     if (state.cull.enabled) {
         state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 257aa9571..9fb734b77 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -109,6 +109,9 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
      true},                                                                                 // DXT45
     {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
+    {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true},                                                                     // DXN2UNORM
+    {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
     {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
      true},                                                                    // BC7U
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_4X4
@@ -180,36 +183,49 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
     return {0, actual_height, width, 0};
 }
 
+/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
+static bool IsFormatBCn(PixelFormat format) {
+    switch (format) {
+    case PixelFormat::DXT1:
+    case PixelFormat::DXT23:
+    case PixelFormat::DXT45:
+    case PixelFormat::DXN1:
+    case PixelFormat::DXN2SNORM:
+    case PixelFormat::DXN2UNORM:
+    case PixelFormat::BC7U:
+        return true;
+    }
+    return false;
+}
+
 template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer,
+                Tegra::GPUVAddr addr) {
     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
     const auto& gpu = Core::System::GetInstance().GPU();
 
     if (morton_to_gl) {
-        if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) {
-            auto data = Tegra::Texture::UnswizzleTexture(
-                *gpu.memory_manager->GpuToCpuAddress(addr),
-                SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
-            std::memcpy(gl_buffer, data.data(), data.size());
-        } else {
-            auto data = Tegra::Texture::UnswizzleDepthTexture(
-                *gpu.memory_manager->GpuToCpuAddress(addr),
-                SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height);
-            std::memcpy(gl_buffer, data.data(), data.size());
-        }
+        // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
+        // pixel values.
+        const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
+        const std::vector<u8> data =
+            Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size,
+                                             bytes_per_pixel, stride, height, block_height);
+        const size_t size_to_copy{std::min(gl_buffer.size(), data.size())};
+        gl_buffer.assign(data.begin(), data.begin() + size_to_copy);
     } else {
         // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
         // check the configuration for this and perform more generic un/swizzle
         LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
         VideoCore::MortonCopyPixels128(
             stride, height, bytes_per_pixel, gl_bytes_per_pixel,
-            Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer,
+            Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer.data(),
             morton_to_gl);
     }
 }
 
-static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
+static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
                             SurfaceParams::MaxPixelFormat>
     morton_to_gl_fns = {
         MortonCopy<true, PixelFormat::ABGR8>,        MortonCopy<true, PixelFormat::B5G6R5>,
@@ -218,6 +234,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
         MortonCopy<true, PixelFormat::DXT1>,         MortonCopy<true, PixelFormat::DXT23>,
         MortonCopy<true, PixelFormat::DXT45>,        MortonCopy<true, PixelFormat::DXN1>,
+        MortonCopy<true, PixelFormat::DXN2UNORM>,    MortonCopy<true, PixelFormat::DXN2SNORM>,
         MortonCopy<true, PixelFormat::BC7U>,         MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
         MortonCopy<true, PixelFormat::G8R8>,         MortonCopy<true, PixelFormat::BGRA8>,
         MortonCopy<true, PixelFormat::RGBA32F>,      MortonCopy<true, PixelFormat::RG32F>,
@@ -231,7 +248,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<true, PixelFormat::Z32FS8>,
 };
 
-static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
+static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
                             SurfaceParams::MaxPixelFormat>
     gl_to_morton_fns = {
         MortonCopy<false, PixelFormat::ABGR8>,
@@ -242,7 +259,10 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<false, PixelFormat::RGBA16F>,
         MortonCopy<false, PixelFormat::R11FG11FB10F>,
         MortonCopy<false, PixelFormat::RGBA32UI>,
-        // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/BC7U/ASTC_2D_4X4 formats is not supported
+        // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not
+        // supported
+        nullptr,
+        nullptr,
         nullptr,
         nullptr,
         nullptr,
@@ -447,22 +467,24 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64
 void CachedSurface::LoadGLBuffer() {
     ASSERT(params.type != SurfaceType::Fill);
 
-    u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
+    const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
 
     ASSERT(texture_src_data);
 
-    gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
+    const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
+    const u32 copy_size = params.width * params.height * bytes_per_pixel;
 
     MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
 
-    if (!params.is_tiled) {
-        const u32 bytes_per_pixel{params.GetFormatBpp() >> 3};
+    if (params.is_tiled) {
+        gl_buffer.resize(copy_size);
 
-        std::memcpy(gl_buffer.data(), texture_src_data,
-                    bytes_per_pixel * params.width * params.height);
-    } else {
         morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
-            params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
+            params.width, params.block_height, params.height, gl_buffer, params.addr);
+    } else {
+        const u8* const texture_src_data_end = texture_src_data + copy_size;
+
+        gl_buffer.assign(texture_src_data, texture_src_data_end);
     }
 
     ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
@@ -485,7 +507,7 @@ void CachedSurface::FlushGLBuffer() {
         std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
     } else {
         gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
-            params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
+            params.width, params.block_height, params.height, gl_buffer, params.addr);
     }
 }
 
@@ -600,8 +622,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
     return GetSurface(SurfaceParams::CreateForTexture(config));
 }
 
-SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
-    bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) {
+SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb,
+                                                                       bool using_depth_fb) {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
     // TODO(bunnei): This is hard corded to use just the first render buffer
@@ -757,10 +779,12 @@ void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*
 }
 
 void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) {
-    for (const auto& pair : surface_cache) {
-        const auto& surface{pair.second};
+    for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) {
+        const auto& surface{iter->second};
         const auto& params{surface->GetSurfaceParams()};
 
+        ++iter;
+
         if (params.IsOverlappingRegion(addr, size)) {
             UnregisterSurface(surface);
         }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0c6652c7a..829a76dfe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -35,31 +35,33 @@ struct SurfaceParams {
         DXT23 = 9,
         DXT45 = 10,
         DXN1 = 11, // This is also known as BC4
-        BC7U = 12,
-        ASTC_2D_4X4 = 13,
-        G8R8 = 14,
-        BGRA8 = 15,
-        RGBA32F = 16,
-        RG32F = 17,
-        R32F = 18,
-        R16F = 19,
-        R16UNORM = 20,
-        RG16 = 21,
-        RG16F = 22,
-        RG16UI = 23,
-        RG16I = 24,
-        RG16S = 25,
-        RGB32F = 26,
-        SRGBA8 = 27,
+        DXN2UNORM = 12,
+        DXN2SNORM = 13,
+        BC7U = 14,
+        ASTC_2D_4X4 = 15,
+        G8R8 = 16,
+        BGRA8 = 17,
+        RGBA32F = 18,
+        RG32F = 19,
+        R32F = 20,
+        R16F = 21,
+        R16UNORM = 22,
+        RG16 = 23,
+        RG16F = 24,
+        RG16UI = 25,
+        RG16I = 26,
+        RG16S = 27,
+        RGB32F = 28,
+        SRGBA8 = 29,
 
         MaxColorFormat,
 
         // DepthStencil formats
-        Z24S8 = 28,
-        S8Z24 = 29,
-        Z32F = 30,
-        Z16 = 31,
-        Z32FS8 = 32,
+        Z24S8 = 30,
+        S8Z24 = 31,
+        Z32F = 32,
+        Z16 = 33,
+        Z32FS8 = 34,
 
         MaxDepthStencilFormat,
 
@@ -109,6 +111,8 @@ struct SurfaceParams {
             4, // DXT23
             4, // DXT45
             4, // DXN1
+            4, // DXN2UNORM
+            4, // DXN2SNORM
             4, // BC7U
             4, // ASTC_2D_4X4
             1, // G8R8
@@ -153,6 +157,8 @@ struct SurfaceParams {
             128, // DXT23
             128, // DXT45
             64,  // DXN1
+            128, // DXN2UNORM
+            128, // DXN2SNORM
             128, // BC7U
             32,  // ASTC_2D_4X4
             16,  // G8R8
@@ -221,6 +227,8 @@ struct SurfaceParams {
             return PixelFormat::RG32F;
         case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
             return PixelFormat::R11FG11FB10F;
+        case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+            return PixelFormat::B5G6R5;
         case Tegra::RenderTargetFormat::RGBA32_UINT:
             return PixelFormat::RGBA32UI;
         case Tegra::RenderTargetFormat::R8_UNORM:
@@ -303,6 +311,16 @@ struct SurfaceParams {
             return PixelFormat::DXT45;
         case Tegra::Texture::TextureFormat::DXN1:
             return PixelFormat::DXN1;
+        case Tegra::Texture::TextureFormat::DXN2:
+            switch (component_type) {
+            case Tegra::Texture::ComponentType::UNORM:
+                return PixelFormat::DXN2UNORM;
+            case Tegra::Texture::ComponentType::SNORM:
+                return PixelFormat::DXN2SNORM;
+            }
+            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+                         static_cast<u32>(component_type));
+            UNREACHABLE();
         case Tegra::Texture::TextureFormat::BC7U:
             return PixelFormat::BC7U;
         case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
@@ -330,89 +348,6 @@ struct SurfaceParams {
         }
     }
 
-    static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) {
-        // TODO(Subv): Properly implement this
-        switch (format) {
-        case PixelFormat::ABGR8:
-        case PixelFormat::SRGBA8:
-            return Tegra::Texture::TextureFormat::A8R8G8B8;
-        case PixelFormat::B5G6R5:
-            return Tegra::Texture::TextureFormat::B5G6R5;
-        case PixelFormat::A2B10G10R10:
-            return Tegra::Texture::TextureFormat::A2B10G10R10;
-        case PixelFormat::A1B5G5R5:
-            return Tegra::Texture::TextureFormat::A1B5G5R5;
-        case PixelFormat::R8:
-            return Tegra::Texture::TextureFormat::R8;
-        case PixelFormat::G8R8:
-            return Tegra::Texture::TextureFormat::G8R8;
-        case PixelFormat::RGBA16F:
-            return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
-        case PixelFormat::R11FG11FB10F:
-            return Tegra::Texture::TextureFormat::BF10GF11RF11;
-        case PixelFormat::RGBA32UI:
-            return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
-        case PixelFormat::DXT1:
-            return Tegra::Texture::TextureFormat::DXT1;
-        case PixelFormat::DXT23:
-            return Tegra::Texture::TextureFormat::DXT23;
-        case PixelFormat::DXT45:
-            return Tegra::Texture::TextureFormat::DXT45;
-        case PixelFormat::DXN1:
-            return Tegra::Texture::TextureFormat::DXN1;
-        case PixelFormat::BC7U:
-            return Tegra::Texture::TextureFormat::BC7U;
-        case PixelFormat::ASTC_2D_4X4:
-            return Tegra::Texture::TextureFormat::ASTC_2D_4X4;
-        case PixelFormat::BGRA8:
-            // TODO(bunnei): This is fine for unswizzling (since we just need the right component
-            // sizes), but could be a bug if we used this function in different ways.
-            return Tegra::Texture::TextureFormat::A8R8G8B8;
-        case PixelFormat::RGBA32F:
-            return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
-        case PixelFormat::RGB32F:
-            return Tegra::Texture::TextureFormat::R32_G32_B32;
-        case PixelFormat::RG32F:
-            return Tegra::Texture::TextureFormat::R32_G32;
-        case PixelFormat::R32F:
-            return Tegra::Texture::TextureFormat::R32;
-        case PixelFormat::R16F:
-        case PixelFormat::R16UNORM:
-            return Tegra::Texture::TextureFormat::R16;
-        case PixelFormat::Z32F:
-            return Tegra::Texture::TextureFormat::ZF32;
-        case PixelFormat::Z24S8:
-            return Tegra::Texture::TextureFormat::Z24S8;
-        case PixelFormat::RG16F:
-        case PixelFormat::RG16:
-        case PixelFormat::RG16UI:
-        case PixelFormat::RG16I:
-        case PixelFormat::RG16S:
-            return Tegra::Texture::TextureFormat::R16_G16;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) {
-        switch (format) {
-        case PixelFormat::S8Z24:
-            return Tegra::DepthFormat::S8_Z24_UNORM;
-        case PixelFormat::Z24S8:
-            return Tegra::DepthFormat::Z24_S8_UNORM;
-        case PixelFormat::Z32F:
-            return Tegra::DepthFormat::Z32_FLOAT;
-        case PixelFormat::Z16:
-            return Tegra::DepthFormat::Z16_UNORM;
-        case PixelFormat::Z32FS8:
-            return Tegra::DepthFormat::Z32_S8_X24_FLOAT;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
     static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
         // TODO(Subv): Implement more component types
         switch (type) {
@@ -441,6 +376,7 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
         case Tegra::RenderTargetFormat::R8_UNORM:
         case Tegra::RenderTargetFormat::RG16_UNORM:
+        case Tegra::RenderTargetFormat::B5G6R5_UNORM:
             return ComponentType::UNorm;
         case Tegra::RenderTargetFormat::RG16_SNORM:
             return ComponentType::SNorm;
@@ -612,8 +548,7 @@ public:
     Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
 
     /// Get the color and depth surfaces based on the framebuffer configuration
-    SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
-                                                    const MathUtil::Rectangle<s32>& viewport);
+    SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb);
 
     /// Flushes the surface to Switch memory
     void FlushSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e3217db81..32f06f409 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -507,6 +507,8 @@ private:
 
     /// Build the GLSL register list.
     void BuildRegisterList() {
+        regs.reserve(Register::NumRegisters);
+
         for (size_t index = 0; index < Register::NumRegisters; ++index) {
             regs.emplace_back(index, suffix);
         }
@@ -523,6 +525,11 @@ private:
             // shader.
             ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
             return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
+        case Attribute::Index::Unknown_63:
+            // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
+            LOG_CRITICAL(HW_GPU, "Unhandled input attribute Unknown_63");
+            UNREACHABLE();
+            break;
         default:
             const u32 index{static_cast<u32>(attribute) -
                             static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -534,6 +541,8 @@ private:
             LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
             UNREACHABLE();
         }
+
+        return "vec4(0, 0, 0, 0)";
     }
 
     /// Generates code representing an output attribute register.
@@ -602,12 +611,12 @@ private:
 
     /// Generates code representing a 19-bit immediate value
     static std::string GetImmediate19(const Instruction& instr) {
-        return std::to_string(instr.alu.GetImm20_19());
+        return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19());
     }
 
     /// Generates code representing a 32-bit immediate value
     static std::string GetImmediate32(const Instruction& instr) {
-        return std::to_string(instr.alu.GetImm20_32());
+        return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32());
     }
 
     /// Generates code representing a texture sampler.
@@ -650,16 +659,17 @@ private:
      * @param instr Instruction to generate the if condition for.
      * @returns string containing the predicate condition.
      */
-    std::string GetPredicateCondition(u64 index, bool negate) const {
+    std::string GetPredicateCondition(u64 index, bool negate) {
         using Tegra::Shader::Pred;
         std::string variable;
 
         // Index 7 is used as an 'Always True' condition.
-        if (index == static_cast<u64>(Pred::UnusedIndex))
+        if (index == static_cast<u64>(Pred::UnusedIndex)) {
             variable = "true";
-        else
+        } else {
             variable = 'p' + std::to_string(index) + '_' + suffix;
-
+            declr_predicates.insert(variable);
+        }
         if (negate) {
             return "!(" + variable + ')';
         }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 24b1d956b..5c7b636e4 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -7,6 +7,10 @@
 #include <array>
 #include <glad/glad.h>
 
+#include "video_core/engines/maxwell_3d.h"
+
+using Regs = Tegra::Engines::Maxwell3D::Regs;
+
 namespace TextureUnits {
 
 struct TextureUnit {
@@ -120,7 +124,7 @@ public:
             GLuint bindpoint;
             GLuint ssbo;
         };
-        std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{};
+        std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers;
     } draw;
 
     struct {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 16b1bd606..c439446b1 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -27,9 +27,11 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
     case Maxwell::VertexAttribute::Type::UnsignedNorm: {
 
         switch (attrib.size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8:
         case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
             return GL_UNSIGNED_BYTE;
         case Maxwell::VertexAttribute::Size::Size_16_16:
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
             return GL_UNSIGNED_SHORT;
         case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
             return GL_UNSIGNED_INT_2_10_10_10_REV;
@@ -43,6 +45,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
     case Maxwell::VertexAttribute::Type::SignedNorm: {
 
         switch (attrib.size) {
+        case Maxwell::VertexAttribute::Size::Size_32_32_32:
+            return GL_INT;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
         case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
             return GL_BYTE;
         case Maxwell::VertexAttribute::Size::Size_16_16:
@@ -84,6 +89,8 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
 
 inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
     switch (topology) {
+    case Maxwell::PrimitiveTopology::Points:
+        return GL_POINTS;
     case Maxwell::PrimitiveTopology::Triangles:
         return GL_TRIANGLES;
     case Maxwell::PrimitiveTopology::TriangleStrip:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bf9131193..899865e3b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -430,7 +430,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
         break;
     case GL_DEBUG_SEVERITY_NOTIFICATION:
     case GL_DEBUG_SEVERITY_LOW:
-        LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
+        LOG_TRACE(Render_OpenGL, format, str_source, str_type, id, message);
         break;
     }
 }
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 65db84ad3..70746a34e 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -54,6 +54,7 @@ u32 BytesPerPixel(TextureFormat format) {
         return 8;
     case TextureFormat::DXT23:
     case TextureFormat::DXT45:
+    case TextureFormat::DXN2:
     case TextureFormat::BC7U:
         // In this case a 'pixel' actually refers to a 4x4 tile.
         return 16;
@@ -85,87 +86,11 @@ u32 BytesPerPixel(TextureFormat format) {
     }
 }
 
-static u32 DepthBytesPerPixel(DepthFormat format) {
-    switch (format) {
-    case DepthFormat::Z16_UNORM:
-        return 2;
-    case DepthFormat::S8_Z24_UNORM:
-    case DepthFormat::Z24_S8_UNORM:
-    case DepthFormat::Z32_FLOAT:
-        return 4;
-    case DepthFormat::Z32_S8_X24_FLOAT:
-        return 8;
-    default:
-        UNIMPLEMENTED_MSG("Format not implemented");
-        break;
-    }
-}
-
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
-                                 u32 block_height) {
-    u8* data = Memory::GetPointer(address);
-    u32 bytes_per_pixel = BytesPerPixel(format);
-
+std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
+                                 u32 height, u32 block_height) {
     std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
-
-    switch (format) {
-    case TextureFormat::DXT1:
-    case TextureFormat::DXT23:
-    case TextureFormat::DXT45:
-    case TextureFormat::DXN1:
-    case TextureFormat::BC7U:
-        // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel
-        // values.
-        CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
-                         unswizzled_data.data(), true, block_height);
-        break;
-    case TextureFormat::A8R8G8B8:
-    case TextureFormat::A2B10G10R10:
-    case TextureFormat::A1B5G5R5:
-    case TextureFormat::B5G6R5:
-    case TextureFormat::R8:
-    case TextureFormat::G8R8:
-    case TextureFormat::R16_G16_B16_A16:
-    case TextureFormat::R32_G32_B32_A32:
-    case TextureFormat::R32_G32:
-    case TextureFormat::R32:
-    case TextureFormat::R16:
-    case TextureFormat::R16_G16:
-    case TextureFormat::BF10GF11RF11:
-    case TextureFormat::ASTC_2D_4X4:
-    case TextureFormat::R32_G32_B32:
-        CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
-                         unswizzled_data.data(), true, block_height);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Format not implemented");
-        break;
-    }
-
-    return unswizzled_data;
-}
-
-std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height,
-                                      u32 block_height) {
-    u8* data = Memory::GetPointer(address);
-    u32 bytes_per_pixel = DepthBytesPerPixel(format);
-
-    std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
-
-    switch (format) {
-    case DepthFormat::Z16_UNORM:
-    case DepthFormat::S8_Z24_UNORM:
-    case DepthFormat::Z24_S8_UNORM:
-    case DepthFormat::Z32_FLOAT:
-    case DepthFormat::Z32_S8_X24_FLOAT:
-        CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
-                         unswizzled_data.data(), true, block_height);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Format not implemented");
-        break;
-    }
-
+    CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
+                     Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
     return unswizzled_data;
 }
 
@@ -179,6 +104,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     case TextureFormat::DXT23:
     case TextureFormat::DXT45:
     case TextureFormat::DXN1:
+    case TextureFormat::DXN2:
     case TextureFormat::BC7U:
     case TextureFormat::ASTC_2D_4X4:
     case TextureFormat::A8R8G8B8:
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 73a4924d1..1f7b731be 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -13,8 +13,8 @@ namespace Tegra::Texture {
 /**
  * Unswizzles a swizzled texture without changing its format.
  */
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
-                                 u32 block_height = TICEntry::DefaultBlockHeight);
+std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
+                                 u32 height, u32 block_height = TICEntry::DefaultBlockHeight);
 
 /**
  * Unswizzles a swizzled depth texture without changing its format.