12 files changed, 162 insertions, 51 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a46ed4bd7..68f91cc75 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -222,6 +222,18 @@ void Maxwell3D::DrawArrays() {
         debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
     }
 
+    // Both instance configuration registers can not be set at the same time.
+    ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
+               "Illegal combination of instancing parameters");
+
+    if (regs.draw.instance_next) {
+        // Increment the current instance *before* drawing.
+        state.current_instance += 1;
+    } else if (!regs.draw.instance_cont) {
+        // Reset the current instance to 0.
+        state.current_instance = 0;
+    }
+
     const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
     rasterizer.AccelerateDrawBatch(is_indexed);
 
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1b30ce018..771eb5abc 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -638,6 +638,8 @@ public:
                     union {
                         u32 vertex_begin_gl;
                         BitField<0, 16, PrimitiveTopology> topology;
+                        BitField<26, 1, u32> instance_next;
+                        BitField<27, 1, u32> instance_cont;
                     };
                 } draw;
 
@@ -830,6 +832,7 @@ public:
         };
 
         std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
+        u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
     };
 
     State state{};
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index f438fa809..9413a81fb 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -12,6 +12,7 @@
 
 #include <boost/optional.hpp>
 
+#include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_types.h"
 
@@ -79,6 +80,9 @@ union Attribute {
         // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
         // shader.
         TessCoordInstanceIDVertexID = 47,
+        // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
+        // shader. It is unknown what the other values contain.
+        FrontFacing = 63,
     };
 
     union {
@@ -141,6 +145,7 @@ enum class PredCondition : u64 {
     NotEqual = 5,
     GreaterEqual = 6,
     LessThanWithNan = 9,
+    GreaterThanWithNan = 12,
     NotEqualWithNan = 13,
     // TODO(Subv): Other condition types
 };
@@ -213,6 +218,11 @@ enum class FlowCondition : u64 {
     Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
 };
 
+enum class PredicateResultMode : u64 {
+    None = 0x0,
+    NotZero = 0x3,
+};
+
 union Instruction {
     Instruction& operator=(const Instruction& instr) {
         value = instr.value;
@@ -253,7 +263,7 @@ union Instruction {
             BitField<39, 1, u64> invert_a;
             BitField<40, 1, u64> invert_b;
             BitField<41, 2, LogicOperation> operation;
-            BitField<44, 2, u64> unk44;
+            BitField<44, 2, PredicateResultMode> pred_result_mode;
             BitField<48, 3, Pred> pred48;
         } lop;
 
@@ -441,16 +451,20 @@ union Instruction {
         }
 
         bool IsComponentEnabled(size_t component) const {
-            static constexpr std::array<std::array<u32, 8>, 4> mask_lut{
-                {{},
-                 {0x1, 0x2, 0x4, 0x8, 0x3},
-                 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
-                 {0x7, 0xb, 0xd, 0xe, 0xf}}};
+            static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
+                {},
+                {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
+                {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
+                {0x7, 0xb, 0xd, 0xe, 0xf},
+            }};
 
             size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
             index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
 
-            return ((1ull << component) & mask_lut[index][component_mask_selector]) != 0;
+            u32 mask = mask_lut[index][component_mask_selector];
+            // A mask of 0 means this instruction uses an unimplemented mask.
+            ASSERT(mask != 0);
+            return ((1ull << component) & mask) != 0;
         }
     } texs;
 
@@ -516,6 +530,8 @@ public:
         LD_A,
         LD_C,
         ST_A,
+        LDG, // Load from global memory
+        STG, // Store in global memory
         TEX,
         TEXQ, // Texture Query
         TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
@@ -727,6 +743,8 @@ private:
             INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
             INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
             INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+            INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
+            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
             INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
             INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
             INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9d1549fe9..93eadde7a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -124,7 +124,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
         glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset,
                            vertex_array.stride);
 
-        ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented");
+        ASSERT_MSG(vertex_array.divisor == 0, "Instanced vertex arrays are not supported");
     }
 
     // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index b6947b97b..38aa067b6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -142,14 +142,16 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // RG32UI
     {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // R32UI
 
+    // Depth formats
+    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
+    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
+     false}, // Z16
+
     // DepthStencil formats
     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
      false}, // Z24S8
     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
-     false},                                                                            // S8Z24
-    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
-    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
-     false}, // Z16
+     false}, // S8Z24
     {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
      ComponentType::Float, false}, // Z32FS8
 }};
@@ -283,10 +285,10 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
         MortonCopy<true, PixelFormat::RG8S>,
         MortonCopy<true, PixelFormat::RG32UI>,
         MortonCopy<true, PixelFormat::R32UI>,
-        MortonCopy<true, PixelFormat::Z24S8>,
-        MortonCopy<true, PixelFormat::S8Z24>,
         MortonCopy<true, PixelFormat::Z32F>,
         MortonCopy<true, PixelFormat::Z16>,
+        MortonCopy<true, PixelFormat::Z24S8>,
+        MortonCopy<true, PixelFormat::S8Z24>,
         MortonCopy<true, PixelFormat::Z32FS8>,
         // clang-format on
 };
@@ -339,10 +341,10 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
         MortonCopy<false, PixelFormat::RG8S>,
         MortonCopy<false, PixelFormat::RG32UI>,
         MortonCopy<false, PixelFormat::R32UI>,
-        MortonCopy<false, PixelFormat::Z24S8>,
-        MortonCopy<false, PixelFormat::S8Z24>,
         MortonCopy<false, PixelFormat::Z32F>,
         MortonCopy<false, PixelFormat::Z16>,
+        MortonCopy<false, PixelFormat::Z24S8>,
+        MortonCopy<false, PixelFormat::S8Z24>,
         MortonCopy<false, PixelFormat::Z32FS8>,
         // clang-format on
 };
@@ -788,8 +790,6 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
     // Verify surface is compatible for blitting
     const auto& params{surface->GetSurfaceParams()};
     ASSERT(params.type == new_params.type);
-    ASSERT(params.pixel_format == new_params.pixel_format);
-    ASSERT(params.component_type == new_params.component_type);
 
     // Create a new surface with the new parameters, and blit the previous surface to it
     Surface new_surface{std::make_shared<CachedSurface>(new_params)};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 55cf3782c..beec01746 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -68,11 +68,15 @@ struct SurfaceParams {
 
         MaxColorFormat,
 
+        // Depth formats
+        Z32F = 42,
+        Z16 = 43,
+
+        MaxDepthFormat,
+
         // DepthStencil formats
-        Z24S8 = 42,
-        S8Z24 = 43,
-        Z32F = 44,
-        Z16 = 45,
+        Z24S8 = 44,
+        S8Z24 = 45,
         Z32FS8 = 46,
 
         MaxDepthStencilFormat,
@@ -153,10 +157,10 @@ struct SurfaceParams {
             1, // RG8S
             1, // RG32UI
             1, // R32UI
-            1, // Z24S8
-            1, // S8Z24
             1, // Z32F
             1, // Z16
+            1, // Z24S8
+            1, // S8Z24
             1, // Z32FS8
         }};
 
@@ -211,10 +215,10 @@ struct SurfaceParams {
             16,  // RG8S
             64,  // RG32UI
             32,  // R32UI
-            32,  // Z24S8
-            32,  // S8Z24
             32,  // Z32F
             16,  // Z16
+            32,  // Z24S8
+            32,  // S8Z24
             64,  // Z32FS8
         }};
 
@@ -587,6 +591,10 @@ struct SurfaceParams {
             return SurfaceType::ColorTexture;
         }
 
+        if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) {
+            return SurfaceType::Depth;
+        }
+
         if (static_cast<size_t>(pixel_format) <
             static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) {
             return SurfaceType::DepthStencil;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e899237e5..57cf9f213 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -541,7 +541,11 @@ private:
             // vertex shader, and what's the value of the fourth element when inside a Tess Eval
             // shader.
             ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
-            return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
+            return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))";
+        case Attribute::Index::FrontFacing:
+            // TODO(Subv): Find out what the values are for the other elements.
+            ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
+            return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))";
         default:
             const u32 index{static_cast<u32>(attribute) -
                             static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -703,10 +707,11 @@ private:
                                        const std::string& op_a, const std::string& op_b) const {
         using Tegra::Shader::PredCondition;
         static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
-            {PredCondition::LessThan, "<"},        {PredCondition::Equal, "=="},
-            {PredCondition::LessEqual, "<="},      {PredCondition::GreaterThan, ">"},
-            {PredCondition::NotEqual, "!="},       {PredCondition::GreaterEqual, ">="},
-            {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="},
+            {PredCondition::LessThan, "<"},           {PredCondition::Equal, "=="},
+            {PredCondition::LessEqual, "<="},         {PredCondition::GreaterThan, ">"},
+            {PredCondition::NotEqual, "!="},          {PredCondition::GreaterEqual, ">="},
+            {PredCondition::LessThanWithNan, "<"},    {PredCondition::NotEqualWithNan, "!="},
+            {PredCondition::GreaterThanWithNan, ">"},
         };
 
         const auto& comparison{PredicateComparisonStrings.find(condition)};
@@ -715,7 +720,8 @@ private:
 
         std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
         if (condition == PredCondition::LessThanWithNan ||
-            condition == PredCondition::NotEqualWithNan) {
+            condition == PredCondition::NotEqualWithNan ||
+            condition == PredCondition::GreaterThanWithNan) {
             predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
         }
 
@@ -778,28 +784,51 @@ private:
     }
 
     void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
-                             const std::string& op_b) {
+                             const std::string& op_b,
+                             Tegra::Shader::PredicateResultMode predicate_mode,
+                             Tegra::Shader::Pred predicate) {
+        std::string result{};
         switch (logic_op) {
         case LogicOperation::And: {
-            regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
+            result = '(' + op_a + " & " + op_b + ')';
             break;
         }
         case LogicOperation::Or: {
-            regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
+            result = '(' + op_a + " | " + op_b + ')';
             break;
         }
         case LogicOperation::Xor: {
-            regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
+            result = '(' + op_a + " ^ " + op_b + ')';
             break;
         }
         case LogicOperation::PassB: {
-            regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
+            result = op_b;
             break;
         }
         default:
             LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
             UNREACHABLE();
         }
+
+        if (dest != Tegra::Shader::Register::ZeroIndex) {
+            regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
+        }
+
+        using Tegra::Shader::PredicateResultMode;
+        // Write the predicate value depending on the predicate mode.
+        switch (predicate_mode) {
+        case PredicateResultMode::None:
+            // Do nothing.
+            return;
+        case PredicateResultMode::NotZero:
+            // Set the predicate to true if the result is not zero.
+            SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0");
+            break;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented predicate result mode: {}",
+                         static_cast<u32>(predicate_mode));
+            UNREACHABLE();
+        }
     }
 
     void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
@@ -837,6 +866,33 @@ private:
         shader.AddLine('}');
     }
 
+    /*
+     * Emits code to push the input target address to the SSY address stack, incrementing the stack
+     * top.
+     */
+    void EmitPushToSSYStack(u32 target) {
+        shader.AddLine('{');
+        ++shader.scope;
+        shader.AddLine("ssy_stack[ssy_stack_top] = " + std::to_string(target) + "u;");
+        shader.AddLine("ssy_stack_top++;");
+        --shader.scope;
+        shader.AddLine('}');
+    }
+
+    /*
+     * Emits code to pop an address from the SSY address stack, setting the jump address to the
+     * popped address and decrementing the stack top.
+     */
+    void EmitPopFromSSYStack() {
+        shader.AddLine('{');
+        ++shader.scope;
+        shader.AddLine("ssy_stack_top--;");
+        shader.AddLine("jmp_to = ssy_stack[ssy_stack_top];");
+        shader.AddLine("break;");
+        --shader.scope;
+        shader.AddLine('}');
+    }
+
     /**
      * Compiles a single instruction from Tegra to GLSL.
      * @param offset the offset of the Tegra shader instruction.
@@ -1115,7 +1171,9 @@ private:
                 if (instr.alu.lop32i.invert_b)
                     op_b = "~(" + op_b + ')';
 
-                WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
+                WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
+                                    Tegra::Shader::PredicateResultMode::None,
+                                    Tegra::Shader::Pred::UnusedIndex);
                 break;
             }
             default: {
@@ -1181,16 +1239,14 @@ private:
             case OpCode::Id::LOP_C:
             case OpCode::Id::LOP_R:
             case OpCode::Id::LOP_IMM: {
-                ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
-                ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
-
                 if (instr.alu.lop.invert_a)
                     op_a = "~(" + op_a + ')';
 
                 if (instr.alu.lop.invert_b)
                     op_b = "~(" + op_b + ')';
 
-                WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
+                WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
+                                    instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
                 break;
             }
             case OpCode::Id::IMNMX_C:
@@ -1255,8 +1311,6 @@ private:
             break;
         }
         case OpCode::Type::Conversion: {
-            ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
-
             switch (opcode->GetId()) {
             case OpCode::Id::I2I_R: {
                 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
@@ -1859,13 +1913,13 @@ private:
                 ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
 
                 u32 target = offset + instr.bra.GetBranchTarget();
-                shader.AddLine("ssy_target = " + std::to_string(target) + "u;");
+                EmitPushToSSYStack(target);
                 break;
             }
             case OpCode::Id::SYNC: {
                 // The SYNC opcode jumps to the address previously set by the SSY opcode
                 ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
-                shader.AddLine("{ jmp_to = ssy_target; break; }");
+                EmitPopFromSSYStack();
                 break;
             }
             case OpCode::Id::DEPBAR: {
@@ -1936,7 +1990,13 @@ private:
             } else {
                 labels.insert(subroutine.begin);
                 shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
-                shader.AddLine("uint ssy_target = 0u;");
+
+                // TODO(Subv): Figure out the actual depth of the SSY stack, for now it seems
+                // unlikely that shaders will use 20 nested SSYs.
+                constexpr u32 SSY_STACK_SIZE = 20;
+                shader.AddLine("uint ssy_stack[" + std::to_string(SSY_STACK_SIZE) + "];");
+                shader.AddLine("uint ssy_stack_top = 0u;");
+
                 shader.AddLine("while (true) {");
                 ++shader.scope;
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 129c777d1..57e0e1726 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -38,6 +38,7 @@ out vec4 position;
 
 layout (std140) uniform vs_config {
     vec4 viewport_flip;
+    uvec4 instance_id;
 };
 
 void main() {
@@ -90,6 +91,7 @@ out vec4 color;
 
 layout (std140) uniform fs_config {
     vec4 viewport_flip;
+    uvec4 instance_id;
 };
 
 void main() {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 415d42fda..f0886caac 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -37,11 +37,16 @@ void SetShaderUniformBlockBindings(GLuint shader) {
 } // namespace Impl
 
 void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    const auto& regs = gpu.regs;
+    const auto& state = gpu.state;
 
     // TODO(bunnei): Support more than one viewport
     viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
     viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
+
+    // We only assign the instance to the first component of the vector, the rest is just padding.
+    instance_id[0] = state.current_instance;
 }
 
 } // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 716933a0b..75fa73605 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -24,14 +24,15 @@ void SetShaderUniformBlockBindings(GLuint shader);
 } // namespace Impl
 
 /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
+// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
 //       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
 //       Not following that rule will cause problems on some AMD drivers.
 struct MaxwellUniformData {
     void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
     alignas(16) GLvec4 viewport_flip;
+    alignas(16) GLuvec4 instance_id;
 };
-static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
 static_assert(sizeof(MaxwellUniformData) < 16384,
               "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
 
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 8f719fdd8..5d91a0c2f 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -147,6 +147,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
         // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
         // manually mix them. However the shader part of this is not yet implemented.
         return GL_CLAMP_TO_BORDER;
+    case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
+        return GL_MIRROR_CLAMP_TO_EDGE;
     }
     LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
     UNREACHABLE();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 95f1aa0fe..bf30eda6d 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -425,7 +425,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
 
     switch (severity) {
     case GL_DEBUG_SEVERITY_HIGH:
-        LOG_ERROR(Render_OpenGL, format, str_source, str_type, id, message);
+        LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
         break;
     case GL_DEBUG_SEVERITY_MEDIUM:
         LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);