23 files changed, 2640 insertions, 88 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 242a0d1cd..114bed20d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -106,6 +106,8 @@ add_library(video_core STATIC
     textures/decoders.cpp
     textures/decoders.h
     textures/texture.h
+    texture_cache.cpp
+    texture_cache.h
     video_core.cpp
     video_core.h
 )
@@ -127,12 +129,14 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_sampler_cache.h
         renderer_vulkan/vk_scheduler.cpp
         renderer_vulkan/vk_scheduler.h
+        renderer_vulkan/vk_shader_decompiler.cpp
+        renderer_vulkan/vk_shader_decompiler.h
         renderer_vulkan/vk_stream_buffer.cpp
         renderer_vulkan/vk_stream_buffer.h
         renderer_vulkan/vk_swapchain.cpp
         renderer_vulkan/vk_swapchain.h)
 
-    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
+    target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
     target_compile_definitions(video_core PRIVATE HAS_VULKAN)
 endif()
 
@@ -140,3 +144,6 @@ create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
 target_link_libraries(video_core PRIVATE glad)
+if (ENABLE_VULKAN)
+    target_link_libraries(video_core PRIVATE sirit)
+endif()
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 8b1bea1ae..046d047cb 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -8,6 +8,7 @@
 #include "video_core/dma_pusher.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7f613370b..2e1e96c81 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1238,13 +1238,16 @@ union Instruction {
 
     union {
         BitField<20, 16, u64> imm20_16;
+        BitField<35, 1, u64> high_b_rr; // used on RR
         BitField<36, 1, u64> product_shift_left;
         BitField<37, 1, u64> merge_37;
         BitField<48, 1, u64> sign_a;
         BitField<49, 1, u64> sign_b;
+        BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
         BitField<50, 3, XmadMode> mode;
         BitField<52, 1, u64> high_b;
         BitField<53, 1, u64> high_a;
+        BitField<55, 1, u64> product_shift_left_second; // used on CR
         BitField<56, 1, u64> merge_56;
     } xmad;
 
@@ -1662,7 +1665,7 @@ private:
             INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
             INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
             INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
-            INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+            INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
             INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
             INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
             INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 7989ec11b..25652e794 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,6 +7,7 @@
 
 #include "common/alignment.h"
 #include "core/core.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 5842d6213..8d9ee81f1 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -6,6 +6,7 @@
 
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7ff1e6737..d250d5cbb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -299,6 +299,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     BaseBindings base_bindings;
     std::array<bool, Maxwell::NumClipDistances> clip_distances{};
 
+    // Prepare packed bindings
+    bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
+    bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
+
     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto& shader_config = gpu.regs.shader_config[index];
         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -321,8 +325,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
             &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
 
         // Bind the emulation info buffer
-        glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
-                          static_cast<GLsizeiptr>(sizeof(ubo)));
+        bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
+                                 static_cast<GLsizeiptr>(sizeof(ubo)));
 
         Shader shader{shader_cache.GetStageProgram(program)};
         const auto [program_handle, next_bindings] =
@@ -366,6 +370,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
         base_bindings = next_bindings;
     }
 
+    bind_ubo_pushbuffer.Bind();
+    bind_ssbo_pushbuffer.Bind();
+
     SyncClipEnabled(clip_distances);
 
     gpu.dirty_flags.shaders = false;
@@ -900,23 +907,14 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
     const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
     const auto& entries = shader->GetShaderEntries().const_buffers;
 
-    constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
-    std::array<GLuint, max_binds> bind_buffers;
-    std::array<GLintptr, max_binds> bind_offsets;
-    std::array<GLsizeiptr, max_binds> bind_sizes;
-
-    ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
-
     // Upload only the enabled buffers from the 16 constbuffers of each shader stage
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& used_buffer = entries[bindpoint];
         const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
 
         if (!buffer.enabled) {
-            // With disabled buffers set values as zero to unbind them
-            bind_buffers[bindpoint] = 0;
-            bind_offsets[bindpoint] = 0;
-            bind_sizes[bindpoint] = 0;
+            // Set values to zero to unbind buffers
+            bind_ubo_pushbuffer.Push(0, 0, 0);
             continue;
         }
 
@@ -944,30 +942,19 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
         const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
             buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
 
-        // Prepare values for multibind
-        bind_buffers[bindpoint] = buffer_cache.GetHandle();
-        bind_offsets[bindpoint] = const_buffer_offset;
-        bind_sizes[bindpoint] = size;
+        bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
     }
-
-    // The first binding is reserved for emulation values
-    const GLuint ubo_base_binding = base_bindings.cbuf + 1;
-    glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
-                       bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
 }
 
 void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
                                           const Shader& shader, GLenum primitive_mode,
                                           BaseBindings base_bindings) {
-    // TODO(Rodrigo): Use ARB_multi_bind here
     const auto& entries = shader->GetShaderEntries().global_memory_entries;
-
-    for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
-        const auto& entry = entries[bindpoint];
-        const u32 current_bindpoint = base_bindings.gmem + bindpoint;
-        const auto& region = global_cache.GetGlobalRegion(entry, stage);
-
-        glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
+    for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+        const auto& entry{entries[bindpoint]};
+        const auto& region{global_cache.GetGlobalRegion(entry, stage)};
+        bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
+                                  static_cast<GLsizeiptr>(region->GetSizeInBytes()));
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 54fbf48aa..e4c64ae71 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,6 +28,7 @@
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/utils.h"
 
 namespace Core {
 class System;
@@ -229,6 +230,9 @@ private:
     PrimitiveAssembler primitive_assembler{buffer_cache};
     GLint uniform_buffer_alignment;
 
+    BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
+    BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
+
     std::size_t CalculateVertexArraysSize() const;
 
     std::size_t CalculateIndexBufferSize() const;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index e2ec72b4e..f2ffc4710 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,6 +15,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/morton.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
@@ -280,6 +281,10 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
     params.component_type = ComponentTypeFromRenderTarget(config.format);
     params.type = GetFormatType(params.pixel_format);
     params.width = config.width;
+    if (!params.is_tiled) {
+        const u32 bpp = params.GetFormatBpp() / 8;
+        params.pitch = config.width * bpp;
+    }
     params.height = config.height;
     params.unaligned_height = config.height;
     params.target = SurfaceTarget::Texture2D;
@@ -676,8 +681,8 @@ void CachedSurface::FlushGLBuffer() {
     gl_buffer[0].resize(GetSizeInBytes());
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
+    glPixelStorei(GL_PACK_ALIGNMENT, align);
     glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
     ASSERT(!tuple.compressed);
     glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -722,8 +727,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
 
-    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
+    glPixelStorei(GL_UNPACK_ALIGNMENT, align);
     glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
 
     const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
@@ -1189,10 +1194,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
         return new_surface;
     }
 
+    const bool old_compressed =
+        GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed;
+    const bool new_compressed =
+        GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed;
+    const bool compatible_formats =
+        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) &&
+        !(old_compressed || new_compressed);
     // For compatible surfaces, we can just do fast glCopyImageSubData based copy
-    if (old_params.target == new_params.target && old_params.type == new_params.type &&
-        old_params.depth == new_params.depth && old_params.depth == 1 &&
-        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) {
+    if (old_params.target == new_params.target && old_params.depth == new_params.depth &&
+        old_params.depth == 1 && compatible_formats) {
         FastCopySurface(old_surface, new_surface);
         return new_surface;
     }
@@ -1207,7 +1218,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
     case SurfaceTarget::TextureCubemap:
     case SurfaceTarget::Texture2DArray:
     case SurfaceTarget::TextureCubeArray:
-        if (old_params.pixel_format == new_params.pixel_format)
+        if (compatible_formats)
             FastLayeredCopySurface(old_surface, new_surface);
         else {
             AccurateCopySurface(old_surface, new_surface);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index ad4fd3ad2..db280dbb3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -11,6 +11,7 @@
 #include <vector>
 
 #include "common/alignment.h"
+#include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/hash.h"
 #include "common/math_util.h"
@@ -205,6 +206,13 @@ struct SurfaceParams {
         return bd;
     }
 
+    u32 RowAlign(u32 mip_level) const {
+        const u32 m_width = MipWidth(mip_level);
+        const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
+        const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
+        return (1U << l2);
+    }
+
     /// Creates SurfaceParams from a texture configuration
     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
                                           const GLShader::SamplerEntry& entry);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index ab381932c..99f67494c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -7,6 +7,7 @@
 #include "common/hash.h"
 #include "core/core.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3ea08ef7b..28e490b3c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -552,8 +552,7 @@ private:
             } else if (std::holds_alternative<OperationNode>(*offset)) {
                 // Indirect access
                 const std::string final_offset = code.GenerateTemporary();
-                code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " +
-                             std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';');
+                code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);");
                 return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
                                    final_offset, final_offset);
 
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index eaf3e03a0..05ab01dcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,12 +2,44 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 
 namespace OpenGL::GLShader {
 
 using Tegra::Engines::Maxwell3D;
 
+ProgramManager::ProgramManager() {
+    pipeline.Create();
+}
+
+ProgramManager::~ProgramManager() = default;
+
+void ProgramManager::ApplyTo(OpenGLState& state) {
+    UpdatePipeline();
+    state.draw.shader_program = 0;
+    state.draw.program_pipeline = pipeline.handle;
+}
+
+void ProgramManager::UpdatePipeline() {
+    // Avoid updating the pipeline when values have no changed
+    if (old_state == current_state) {
+        return;
+    }
+
+    // Workaround for AMD bug
+    constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
+                                     GL_FRAGMENT_SHADER_BIT};
+    glUseProgramStages(pipeline.handle, all_used_stages, 0);
+
+    glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
+    glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
+    glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
+
+    old_state = current_state;
+}
+
 void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
     const auto& regs = maxwell.regs;
     const auto& state = maxwell.state;
@@ -16,7 +48,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade
     viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
     viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
 
-    u32 func = static_cast<u32>(regs.alpha_test_func);
+    auto func{static_cast<u32>(regs.alpha_test_func)};
     // Normalize the gl variants of opCompare to be the same as the normal variants
     const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
     if (func >= op_gl_variant_base) {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 37dcfefdb..cec18a832 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include <cstddef>
+
 #include <glad/glad.h>
 
 #include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -38,55 +40,48 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
 
 class ProgramManager {
 public:
-    ProgramManager() {
-        pipeline.Create();
-    }
+    explicit ProgramManager();
+    ~ProgramManager();
+
+    void ApplyTo(OpenGLState& state);
 
     void UseProgrammableVertexShader(GLuint program) {
-        vs = program;
+        current_state.vertex_shader = program;
     }
 
     void UseProgrammableGeometryShader(GLuint program) {
-        gs = program;
+        current_state.geometry_shader = program;
     }
 
     void UseProgrammableFragmentShader(GLuint program) {
-        fs = program;
+        current_state.fragment_shader = program;
     }
 
     void UseTrivialGeometryShader() {
-        gs = 0;
-    }
-
-    void ApplyTo(OpenGLState& state) {
-        UpdatePipeline();
-        state.draw.shader_program = 0;
-        state.draw.program_pipeline = pipeline.handle;
+        current_state.geometry_shader = 0;
     }
 
 private:
-    void UpdatePipeline() {
-        // Avoid updating the pipeline when values have no changed
-        if (old_vs == vs && old_fs == fs && old_gs == gs)
-            return;
-        // Workaround for AMD bug
-        glUseProgramStages(pipeline.handle,
-                           GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
-                           0);
-
-        glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs);
-        glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs);
-        glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs);
-
-        // Update the old values
-        old_vs = vs;
-        old_fs = fs;
-        old_gs = gs;
-    }
+    struct PipelineState {
+        bool operator==(const PipelineState& rhs) const {
+            return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
+                   geometry_shader == rhs.geometry_shader;
+        }
+
+        bool operator!=(const PipelineState& rhs) const {
+            return !operator==(rhs);
+        }
+
+        GLuint vertex_shader{};
+        GLuint fragment_shader{};
+        GLuint geometry_shader{};
+    };
+
+    void UpdatePipeline();
 
     OGLPipeline pipeline;
-    GLuint vs{}, fs{}, gs{};
-    GLuint old_vs{}, old_fs{}, old_gs{};
+    PipelineState current_state;
+    PipelineState old_state;
 };
 
 } // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index d84634cb3..84a987371 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -5,11 +5,39 @@
 #include <string>
 #include <fmt/format.h>
 #include <glad/glad.h>
+#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/renderer_opengl/utils.h"
 
 namespace OpenGL {
 
+BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
+
+BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
+
+void BindBuffersRangePushBuffer::Setup(GLuint first_) {
+    first = first_;
+    buffers.clear();
+    offsets.clear();
+    sizes.clear();
+}
+
+void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) {
+    buffers.push_back(buffer);
+    offsets.push_back(offset);
+    sizes.push_back(size);
+}
+
+void BindBuffersRangePushBuffer::Bind() const {
+    const std::size_t count{buffers.size()};
+    DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
+    if (count == 0) {
+        return;
+    }
+    glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
+                       sizes.data());
+}
+
 void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
     if (!GLAD_GL_KHR_debug) {
         return; // We don't need to throw an error as this is just for debugging
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 1fcb6fc11..aef45c9dc 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -5,11 +5,31 @@
 #pragma once
 
 #include <string>
+#include <vector>
 #include <glad/glad.h>
 #include "common/common_types.h"
 
 namespace OpenGL {
 
+class BindBuffersRangePushBuffer {
+public:
+    BindBuffersRangePushBuffer(GLenum target);
+    ~BindBuffersRangePushBuffer();
+
+    void Setup(GLuint first_);
+
+    void Push(GLuint buffer, GLintptr offset, GLsizeiptr size);
+
+    void Bind() const;
+
+private:
+    GLenum target;
+    GLuint first;
+    std::vector<GLuint> buffers;
+    std::vector<GLintptr> offsets;
+    std::vector<GLsizeiptr> sizes;
+};
+
 void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");
 
 } // namespace OpenGL
 \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
new file mode 100644
index 000000000..e0a6f5e87
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -0,0 +1,1379 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <functional>
+#include <map>
+#include <set>
+
+#include <fmt/format.h>
+
+#include <sirit/sirit.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace Vulkan::VKShader {
+
+using Sirit::Id;
+using Tegra::Shader::Attribute;
+using Tegra::Shader::AttributeUse;
+using Tegra::Shader::Register;
+using namespace VideoCommon::Shader;
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
+using Operation = const OperationNode&;
+
+// TODO(Rodrigo): Use rasterizer's value
+constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000;
+constexpr u32 STAGE_BINDING_STRIDE = 0x100;
+
+enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+
+struct SamplerImage {
+    Id image_type;
+    Id sampled_image_type;
+    Id sampler;
+};
+
+namespace {
+
+spv::Dim GetSamplerDim(const Sampler& sampler) {
+    switch (sampler.GetType()) {
+    case Tegra::Shader::TextureType::Texture1D:
+        return spv::Dim::Dim1D;
+    case Tegra::Shader::TextureType::Texture2D:
+        return spv::Dim::Dim2D;
+    case Tegra::Shader::TextureType::Texture3D:
+        return spv::Dim::Dim3D;
+    case Tegra::Shader::TextureType::TextureCube:
+        return spv::Dim::Cube;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented sampler type={}", static_cast<u32>(sampler.GetType()));
+        return spv::Dim::Dim2D;
+    }
+}
+
+/// Returns true if an attribute index is one of the 32 generic attributes
+constexpr bool IsGenericAttribute(Attribute::Index attribute) {
+    return attribute >= Attribute::Index::Attribute_0 &&
+           attribute <= Attribute::Index::Attribute_31;
+}
+
+/// Returns the location of a generic attribute
+constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) {
+    ASSERT(IsGenericAttribute(attribute));
+    return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
+}
+
+/// Returns true if an object has to be treated as precise
+bool IsPrecise(Operation operand) {
+    const auto& meta = operand.GetMeta();
+
+    if (std::holds_alternative<MetaArithmetic>(meta)) {
+        return std::get<MetaArithmetic>(meta).precise;
+    }
+    if (std::holds_alternative<MetaHalfArithmetic>(meta)) {
+        return std::get<MetaHalfArithmetic>(meta).precise;
+    }
+    return false;
+}
+
+} // namespace
+
+class SPIRVDecompiler : public Sirit::Module {
+public:
+    explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage)
+        : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} {
+        AddCapability(spv::Capability::Shader);
+        AddExtension("SPV_KHR_storage_buffer_storage_class");
+        AddExtension("SPV_KHR_variable_pointers");
+    }
+
+    void Decompile() {
+        AllocateBindings();
+        AllocateLabels();
+
+        DeclareVertex();
+        DeclareGeometry();
+        DeclareFragment();
+        DeclareRegisters();
+        DeclarePredicates();
+        DeclareLocalMemory();
+        DeclareInternalFlags();
+        DeclareInputAttributes();
+        DeclareOutputAttributes();
+        DeclareConstantBuffers();
+        DeclareGlobalBuffers();
+        DeclareSamplers();
+
+        execute_function =
+            Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void)));
+        Emit(OpLabel());
+
+        const u32 first_address = ir.GetBasicBlocks().begin()->first;
+        const Id loop_label = OpLabel("loop");
+        const Id merge_label = OpLabel("merge");
+        const Id dummy_label = OpLabel();
+        const Id jump_label = OpLabel();
+        continue_label = OpLabel("continue");
+
+        std::vector<Sirit::Literal> literals;
+        std::vector<Id> branch_labels;
+        for (const auto& pair : labels) {
+            const auto [literal, label] = pair;
+            literals.push_back(literal);
+            branch_labels.push_back(label);
+        }
+
+        // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
+        // that shaders will use 20 nested SSYs and PBKs.
+        constexpr u32 FLOW_STACK_SIZE = 20;
+        const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
+        jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
+                                 spv::StorageClass::Function, Constant(t_uint, first_address)));
+        flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type),
+                                     spv::StorageClass::Function, ConstantNull(flow_stack_type)));
+        flow_stack_top =
+            Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0)));
+
+        Name(jmp_to, "jmp_to");
+        Name(flow_stack, "flow_stack");
+        Name(flow_stack_top, "flow_stack_top");
+
+        Emit(OpBranch(loop_label));
+        Emit(loop_label);
+        Emit(OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::Unroll));
+        Emit(OpBranch(dummy_label));
+
+        Emit(dummy_label);
+        const Id default_branch = OpLabel();
+        const Id jmp_to_load = Emit(OpLoad(t_uint, jmp_to));
+        Emit(OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone));
+        Emit(OpSwitch(jmp_to_load, default_branch, literals, branch_labels));
+
+        Emit(default_branch);
+        Emit(OpReturn());
+
+        for (const auto& pair : ir.GetBasicBlocks()) {
+            const auto& [address, bb] = pair;
+            Emit(labels.at(address));
+
+            VisitBasicBlock(bb);
+
+            const auto next_it = labels.lower_bound(address + 1);
+            const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
+            Emit(OpBranch(next_label));
+        }
+
+        Emit(jump_label);
+        Emit(OpBranch(continue_label));
+        Emit(continue_label);
+        Emit(OpBranch(loop_label));
+        Emit(merge_label);
+        Emit(OpReturn());
+        Emit(OpFunctionEnd());
+    }
+
+    ShaderEntries GetShaderEntries() const {
+        ShaderEntries entries;
+        entries.const_buffers_base_binding = const_buffers_base_binding;
+        entries.global_buffers_base_binding = global_buffers_base_binding;
+        entries.samplers_base_binding = samplers_base_binding;
+        for (const auto& cbuf : ir.GetConstantBuffers()) {
+            entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
+        }
+        for (const auto& gmem : ir.GetGlobalMemoryBases()) {
+            entries.global_buffers.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
+        }
+        for (const auto& sampler : ir.GetSamplers()) {
+            entries.samplers.emplace_back(sampler);
+        }
+        for (const auto& attr : ir.GetInputAttributes()) {
+            entries.attributes.insert(GetGenericAttributeLocation(attr.first));
+        }
+        entries.clip_distances = ir.GetClipDistances();
+        entries.shader_length = ir.GetLength();
+        entries.entry_function = execute_function;
+        entries.interfaces = interfaces;
+        return entries;
+    }
+
+private:
+    using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
+    using OperationDecompilersArray =
+        std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
+
+    static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
+    static constexpr u32 CBUF_STRIDE = 16;
+
+    void AllocateBindings() {
+        const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
+        u32 binding_iterator = binding_base;
+
+        const auto Allocate = [&binding_iterator](std::size_t count) {
+            const u32 current_binding = binding_iterator;
+            binding_iterator += static_cast<u32>(count);
+            return current_binding;
+        };
+        const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size());
+        global_buffers_base_binding = Allocate(ir.GetGlobalMemoryBases().size());
+        samplers_base_binding = Allocate(ir.GetSamplers().size());
+
+        ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE,
+                   "Stage binding stride is too small");
+    }
+
+    void AllocateLabels() {
+        for (const auto& pair : ir.GetBasicBlocks()) {
+            const u32 address = pair.first;
+            labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
+        }
+    }
+
+    void DeclareVertex() {
+        if (stage != ShaderStage::Vertex)
+            return;
+
+        DeclareVertexRedeclarations();
+    }
+
+    void DeclareGeometry() {
+        if (stage != ShaderStage::Geometry)
+            return;
+
+        UNIMPLEMENTED();
+    }
+
+    void DeclareFragment() {
+        if (stage != ShaderStage::Fragment)
+            return;
+
+        for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) {
+            if (!IsRenderTargetUsed(rt)) {
+                continue;
+            }
+
+            const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
+            Name(id, fmt::format("frag_color{}", rt));
+            Decorate(id, spv::Decoration::Location, rt);
+
+            frag_colors[rt] = id;
+            interfaces.push_back(id);
+        }
+
+        if (header.ps.omap.depth) {
+            frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
+            Name(frag_depth, "frag_depth");
+            Decorate(frag_depth, spv::Decoration::BuiltIn,
+                     static_cast<u32>(spv::BuiltIn::FragDepth));
+
+            interfaces.push_back(frag_depth);
+        }
+
+        frag_coord = DeclareBuiltIn(spv::BuiltIn::FragCoord, spv::StorageClass::Input, t_in_float4,
+                                    "frag_coord");
+        front_facing = DeclareBuiltIn(spv::BuiltIn::FrontFacing, spv::StorageClass::Input,
+                                      t_in_bool, "front_facing");
+    }
+
+    void DeclareRegisters() {
+        for (const u32 gpr : ir.GetRegisters()) {
+            const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
+            Name(id, fmt::format("gpr_{}", gpr));
+            registers.emplace(gpr, AddGlobalVariable(id));
+        }
+    }
+
+    void DeclarePredicates() {
+        for (const auto pred : ir.GetPredicates()) {
+            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+            Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
+            predicates.emplace(pred, AddGlobalVariable(id));
+        }
+    }
+
+    void DeclareLocalMemory() {
+        if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
+            const auto element_count = static_cast<u32>(Common::AlignUp(local_memory_size, 4) / 4);
+            const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
+            const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
+            Name(type_pointer, "LocalMemory");
+
+            local_memory =
+                OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
+            AddGlobalVariable(Name(local_memory, "local_memory"));
+        }
+    }
+
+    void DeclareInternalFlags() {
+        constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
+                                                                         "overflow"};
+        for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
+            const auto flag_code = static_cast<InternalFlag>(flag);
+            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+            internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
+        }
+    }
+
+    void DeclareInputAttributes() {
+        for (const auto element : ir.GetInputAttributes()) {
+            const Attribute::Index index = element.first;
+            if (!IsGenericAttribute(index)) {
+                continue;
+            }
+
+            UNIMPLEMENTED_IF(stage == ShaderStage::Geometry);
+
+            const u32 location = GetGenericAttributeLocation(index);
+            const Id id = OpVariable(t_in_float4, spv::StorageClass::Input);
+            Name(AddGlobalVariable(id), fmt::format("in_attr{}", location));
+            input_attributes.emplace(index, id);
+            interfaces.push_back(id);
+
+            Decorate(id, spv::Decoration::Location, location);
+
+            if (stage != ShaderStage::Fragment) {
+                continue;
+            }
+            switch (header.ps.GetAttributeUse(location)) {
+            case AttributeUse::Constant:
+                Decorate(id, spv::Decoration::Flat);
+                break;
+            case AttributeUse::ScreenLinear:
+                Decorate(id, spv::Decoration::NoPerspective);
+                break;
+            case AttributeUse::Perspective:
+                // Default
+                break;
+            default:
+                UNREACHABLE_MSG("Unused attribute being fetched");
+            }
+        }
+    }
+
+    void DeclareOutputAttributes() {
+        for (const auto index : ir.GetOutputAttributes()) {
+            if (!IsGenericAttribute(index)) {
+                continue;
+            }
+            const auto location = GetGenericAttributeLocation(index);
+            const Id id = OpVariable(t_out_float4, spv::StorageClass::Output);
+            Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
+            output_attributes.emplace(index, id);
+            interfaces.push_back(id);
+
+            Decorate(id, spv::Decoration::Location, location);
+        }
+    }
+
+    void DeclareConstantBuffers() {
+        u32 binding = const_buffers_base_binding;
+        for (const auto& entry : ir.GetConstantBuffers()) {
+            const auto [index, size] = entry;
+            const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform);
+            AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
+
+            Decorate(id, spv::Decoration::Binding, binding++);
+            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+            constant_buffers.emplace(index, id);
+        }
+    }
+
+    void DeclareGlobalBuffers() {
+        u32 binding = global_buffers_base_binding;
+        for (const auto& entry : ir.GetGlobalMemoryBases()) {
+            const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
+            AddGlobalVariable(
+                Name(id, fmt::format("gmem_{}_{}", entry.cbuf_index, entry.cbuf_offset)));
+
+            Decorate(id, spv::Decoration::Binding, binding++);
+            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+            global_buffers.emplace(entry, id);
+        }
+    }
+
+    void DeclareSamplers() {
+        u32 binding = samplers_base_binding;
+        for (const auto& sampler : ir.GetSamplers()) {
+            const auto dim = GetSamplerDim(sampler);
+            const int depth = sampler.IsShadow() ? 1 : 0;
+            const int arrayed = sampler.IsArray() ? 1 : 0;
+            // TODO(Rodrigo): Sampled 1 indicates that the image will be used with a sampler. When
+            // SULD and SUST instructions are implemented, replace this value.
+            const int sampled = 1;
+            const Id image_type =
+                TypeImage(t_float, dim, depth, arrayed, false, sampled, spv::ImageFormat::Unknown);
+            const Id sampled_image_type = TypeSampledImage(image_type);
+            const Id pointer_type =
+                TypePointer(spv::StorageClass::UniformConstant, sampled_image_type);
+            const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
+            AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex())));
+
+            sampler_images.insert(
+                {static_cast<u32>(sampler.GetIndex()), {image_type, sampled_image_type, id}});
+
+            Decorate(id, spv::Decoration::Binding, binding++);
+            Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+        }
+    }
+
+    void DeclareVertexRedeclarations() {
+        vertex_index = DeclareBuiltIn(spv::BuiltIn::VertexIndex, spv::StorageClass::Input,
+                                      t_in_uint, "vertex_index");
+        instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
+                                        t_in_uint, "instance_index");
+
+        bool is_point_size_declared = false;
+        bool is_clip_distances_declared = false;
+        for (const auto index : ir.GetOutputAttributes()) {
+            if (index == Attribute::Index::PointSize) {
+                is_point_size_declared = true;
+            } else if (index == Attribute::Index::ClipDistances0123 ||
+                       index == Attribute::Index::ClipDistances4567) {
+                is_clip_distances_declared = true;
+            }
+        }
+
+        std::vector<Id> members;
+        members.push_back(t_float4);
+        if (is_point_size_declared) {
+            members.push_back(t_float);
+        }
+        if (is_clip_distances_declared) {
+            members.push_back(TypeArray(t_float, Constant(t_uint, 8)));
+        }
+
+        const Id gl_per_vertex_struct = Name(TypeStruct(members), "PerVertex");
+        Decorate(gl_per_vertex_struct, spv::Decoration::Block);
+
+        u32 declaration_index = 0;
+        const auto MemberDecorateBuiltIn = [&](spv::BuiltIn builtin, std::string name,
+                                               bool condition) {
+            if (!condition)
+                return u32{};
+            MemberName(gl_per_vertex_struct, declaration_index, name);
+            MemberDecorate(gl_per_vertex_struct, declaration_index, spv::Decoration::BuiltIn,
+                           static_cast<u32>(builtin));
+            return declaration_index++;
+        };
+
+        position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
+        point_size_index =
+            MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared);
+        clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
+                                                     is_clip_distances_declared);
+
+        const Id type_pointer = TypePointer(spv::StorageClass::Output, gl_per_vertex_struct);
+        per_vertex = OpVariable(type_pointer, spv::StorageClass::Output);
+        AddGlobalVariable(Name(per_vertex, "per_vertex"));
+        interfaces.push_back(per_vertex);
+    }
+
+    void VisitBasicBlock(const NodeBlock& bb) {
+        for (const Node node : bb) {
+            static_cast<void>(Visit(node));
+        }
+    }
+
+    Id Visit(Node node) {
+        if (const auto operation = std::get_if<OperationNode>(node)) {
+            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
+            const auto decompiler = operation_decompilers[operation_index];
+            if (decompiler == nullptr) {
+                UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
+            }
+            return (this->*decompiler)(*operation);
+
+        } else if (const auto gpr = std::get_if<GprNode>(node)) {
+            const u32 index = gpr->GetIndex();
+            if (index == Register::ZeroIndex) {
+                return Constant(t_float, 0.0f);
+            }
+            return Emit(OpLoad(t_float, registers.at(index)));
+
+        } else if (const auto immediate = std::get_if<ImmediateNode>(node)) {
+            return BitcastTo<Type::Float>(Constant(t_uint, immediate->GetValue()));
+
+        } else if (const auto predicate = std::get_if<PredicateNode>(node)) {
+            const auto value = [&]() -> Id {
+                switch (const auto index = predicate->GetIndex(); index) {
+                case Tegra::Shader::Pred::UnusedIndex:
+                    return v_true;
+                case Tegra::Shader::Pred::NeverExecute:
+                    return v_false;
+                default:
+                    return Emit(OpLoad(t_bool, predicates.at(index)));
+                }
+            }();
+            if (predicate->IsNegated()) {
+                return Emit(OpLogicalNot(t_bool, value));
+            }
+            return value;
+
+        } else if (const auto abuf = std::get_if<AbufNode>(node)) {
+            const auto attribute = abuf->GetIndex();
+            const auto element = abuf->GetElement();
+
+            switch (attribute) {
+            case Attribute::Index::Position:
+                if (stage != ShaderStage::Fragment) {
+                    UNIMPLEMENTED();
+                    break;
+                } else {
+                    if (element == 3) {
+                        return Constant(t_float, 1.0f);
+                    }
+                    return Emit(OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)));
+                }
+            case Attribute::Index::TessCoordInstanceIDVertexID:
+                // TODO(Subv): Find out what the values are for the first two elements when inside a
+                // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+                // shader.
+                ASSERT(stage == ShaderStage::Vertex);
+                switch (element) {
+                case 2:
+                    return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, instance_index)));
+                case 3:
+                    return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, vertex_index)));
+                }
+                UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+                return Constant(t_float, 0);
+            case Attribute::Index::FrontFacing:
+                // TODO(Subv): Find out what the values are for the other elements.
+                ASSERT(stage == ShaderStage::Fragment);
+                if (element == 3) {
+                    const Id is_front_facing = Emit(OpLoad(t_bool, front_facing));
+                    const Id true_value =
+                        BitcastTo<Type::Float>(Constant(t_int, static_cast<s32>(-1)));
+                    const Id false_value = BitcastTo<Type::Float>(Constant(t_int, 0));
+                    return Emit(OpSelect(t_float, is_front_facing, true_value, false_value));
+                }
+                UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
+                return Constant(t_float, 0.0f);
+            default:
+                if (IsGenericAttribute(attribute)) {
+                    const Id pointer =
+                        AccessElement(t_in_float, input_attributes.at(attribute), element);
+                    return Emit(OpLoad(t_float, pointer));
+                }
+                break;
+            }
+            UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+
+        } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
+            const Node offset = cbuf->GetOffset();
+            const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
+
+            Id buffer_index{};
+            Id buffer_element{};
+
+            if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
+                // Direct access
+                const u32 offset_imm = immediate->GetValue();
+                ASSERT(offset_imm % 4 == 0);
+                buffer_index = Constant(t_uint, offset_imm / 16);
+                buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
+
+            } else if (std::holds_alternative<OperationNode>(*offset)) {
+                // Indirect access
+                // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
+                // emits sub-optimal code on GLSL from my testing).
+                const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
+                const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
+                const Id final_offset = Emit(
+                    OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
+                buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
+                buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
+
+            } else {
+                UNREACHABLE_MSG("Unmanaged offset node type");
+            }
+
+            const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
+                                                  buffer_index, buffer_element));
+            return Emit(OpLoad(t_float, pointer));
+
+        } else if (const auto gmem = std::get_if<GmemNode>(node)) {
+            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
+            const Id real = BitcastTo<Type::Uint>(Visit(gmem->GetRealAddress()));
+            const Id base = BitcastTo<Type::Uint>(Visit(gmem->GetBaseAddress()));
+
+            Id offset = Emit(OpISub(t_uint, real, base));
+            offset = Emit(OpUDiv(t_uint, offset, Constant(t_uint, 4u)));
+            return Emit(OpLoad(t_float, Emit(OpAccessChain(t_gmem_float, gmem_buffer,
+                                                           Constant(t_uint, 0u), offset))));
+
+        } else if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+            // It's invalid to call conditional on nested nodes, use an operation instead
+            const Id true_label = OpLabel();
+            const Id skip_label = OpLabel();
+            Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label));
+            Emit(true_label);
+
+            VisitBasicBlock(conditional->GetCode());
+
+            Emit(OpBranch(skip_label));
+            Emit(skip_label);
+            return {};
+
+        } else if (const auto comment = std::get_if<CommentNode>(node)) {
+            Name(Emit(OpUndef(t_void)), comment->GetText());
+            return {};
+        }
+
+        UNREACHABLE();
+        return {};
+    }
+
+    template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
+    Id Unary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+
+        const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
+              Type type_b = type_a>
+    Id Binary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+        const Id op_b = VisitOperand<type_b>(operation, 1);
+
+        const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
+              Type type_b = type_a, Type type_c = type_b>
+    Id Ternary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+        const Id op_b = VisitOperand<type_b>(operation, 1);
+        const Id op_c = VisitOperand<type_c>(operation, 2);
+
+        const Id value = BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
+              Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
+    Id Quaternary(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+        const Id op_a = VisitOperand<type_a>(operation, 0);
+        const Id op_b = VisitOperand<type_b>(operation, 1);
+        const Id op_c = VisitOperand<type_c>(operation, 2);
+        const Id op_d = VisitOperand<type_d>(operation, 3);
+
+        const Id value =
+            BitcastFrom<result_type>(Emit((this->*func)(type_def, op_a, op_b, op_c, op_d)));
+        if (IsPrecise(operation)) {
+            Decorate(value, spv::Decoration::NoContraction);
+        }
+        return value;
+    }
+
+    Id Assign(Operation operation) {
+        const Node dest = operation[0];
+        const Node src = operation[1];
+
+        Id target{};
+        if (const auto gpr = std::get_if<GprNode>(dest)) {
+            if (gpr->GetIndex() == Register::ZeroIndex) {
+                // Writing to Register::ZeroIndex is a no op
+                return {};
+            }
+            target = registers.at(gpr->GetIndex());
+
+        } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+            target = [&]() -> Id {
+                switch (const auto attribute = abuf->GetIndex(); attribute) {
+                case Attribute::Index::Position:
+                    return AccessElement(t_out_float, per_vertex, position_index,
+                                         abuf->GetElement());
+                case Attribute::Index::PointSize:
+                    return AccessElement(t_out_float, per_vertex, point_size_index);
+                case Attribute::Index::ClipDistances0123:
+                    return AccessElement(t_out_float, per_vertex, clip_distances_index,
+                                         abuf->GetElement());
+                case Attribute::Index::ClipDistances4567:
+                    return AccessElement(t_out_float, per_vertex, clip_distances_index,
+                                         abuf->GetElement() + 4);
+                default:
+                    if (IsGenericAttribute(attribute)) {
+                        return AccessElement(t_out_float, output_attributes.at(attribute),
+                                             abuf->GetElement());
+                    }
+                    UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
+                                      static_cast<u32>(attribute));
+                    return {};
+                }
+            }();
+
+        } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
+            Id address = BitcastTo<Type::Uint>(Visit(lmem->GetAddress()));
+            address = Emit(OpUDiv(t_uint, address, Constant(t_uint, 4)));
+            target = Emit(OpAccessChain(t_prv_float, local_memory, {address}));
+        }
+
+        Emit(OpStore(target, Visit(src)));
+        return {};
+    }
+
+    Id HNegate(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HMergeF32(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HMergeH0(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HMergeH1(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id HPack2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id LogicalAssign(Operation operation) {
+        const Node dest = operation[0];
+        const Node src = operation[1];
+
+        Id target{};
+        if (const auto pred = std::get_if<PredicateNode>(dest)) {
+            ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
+
+            const auto index = pred->GetIndex();
+            switch (index) {
+            case Tegra::Shader::Pred::NeverExecute:
+            case Tegra::Shader::Pred::UnusedIndex:
+                // Writing to these predicates is a no-op
+                return {};
+            }
+            target = predicates.at(index);
+
+        } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) {
+            target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
+        }
+
+        Emit(OpStore(target, Visit(src)));
+        return {};
+    }
+
+    Id LogicalPick2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id LogicalAll2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id LogicalAny2(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id GetTextureSampler(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex()));
+        return Emit(OpLoad(entry.sampled_image_type, entry.sampler));
+    }
+
+    Id GetTextureImage(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto entry = sampler_images.at(static_cast<u32>(meta->sampler.GetIndex()));
+        return Emit(OpImage(entry.image_type, GetTextureSampler(operation)));
+    }
+
+    Id GetTextureCoordinates(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        std::vector<Id> coords;
+        for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
+            coords.push_back(Visit(operation[i]));
+        }
+        if (meta->sampler.IsArray()) {
+            const Id array_integer = BitcastTo<Type::Int>(Visit(meta->array));
+            coords.push_back(Emit(OpConvertSToF(t_float, array_integer)));
+        }
+        if (meta->sampler.IsShadow()) {
+            coords.push_back(Visit(meta->depth_compare));
+        }
+
+        const std::array<Id, 4> t_float_lut = {nullptr, t_float2, t_float3, t_float4};
+        return coords.size() == 1
+                   ? coords[0]
+                   : Emit(OpCompositeConstruct(t_float_lut.at(coords.size() - 1), coords));
+    }
+
+    Id GetTextureElement(Operation operation, Id sample_value) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        ASSERT(meta);
+        return Emit(OpCompositeExtract(t_float, sample_value, meta->element));
+    }
+
+    Id Texture(Operation operation) {
+        const Id texture = Emit(OpImageSampleImplicitLod(t_float4, GetTextureSampler(operation),
+                                                         GetTextureCoordinates(operation)));
+        return GetTextureElement(operation, texture);
+    }
+
+    Id TextureLod(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const Id texture = Emit(OpImageSampleExplicitLod(
+            t_float4, GetTextureSampler(operation), GetTextureCoordinates(operation),
+            spv::ImageOperandsMask::Lod, Visit(meta->lod)));
+        return GetTextureElement(operation, texture);
+    }
+
+    Id TextureGather(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto coords = GetTextureCoordinates(operation);
+
+        Id texture;
+        if (meta->sampler.IsShadow()) {
+            texture = Emit(OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
+                                             Visit(meta->component)));
+        } else {
+            u32 component_value = 0;
+            if (meta->component) {
+                const auto component = std::get_if<ImmediateNode>(meta->component);
+                ASSERT_MSG(component, "Component is not an immediate value");
+                component_value = component->GetValue();
+            }
+            texture = Emit(OpImageGather(t_float4, GetTextureSampler(operation), coords,
+                                         Constant(t_uint, component_value)));
+        }
+
+        return GetTextureElement(operation, texture);
+    }
+
+    Id TextureQueryDimensions(Operation operation) {
+        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
+        const auto image_id = GetTextureImage(operation);
+        AddCapability(spv::Capability::ImageQuery);
+
+        if (meta->element == 3) {
+            return BitcastTo<Type::Float>(Emit(OpImageQueryLevels(t_int, image_id)));
+        }
+
+        const Id lod = VisitOperand<Type::Uint>(operation, 0);
+        const std::size_t coords_count = [&]() {
+            switch (const auto type = meta->sampler.GetType(); type) {
+            case Tegra::Shader::TextureType::Texture1D:
+                return 1;
+            case Tegra::Shader::TextureType::Texture2D:
+            case Tegra::Shader::TextureType::TextureCube:
+                return 2;
+            case Tegra::Shader::TextureType::Texture3D:
+                return 3;
+            default:
+                UNREACHABLE_MSG("Invalid texture type={}", static_cast<u32>(type));
+                return 2;
+            }
+        }();
+
+        if (meta->element >= coords_count) {
+            return Constant(t_float, 0.0f);
+        }
+
+        const std::array<Id, 3> types = {t_int, t_int2, t_int3};
+        const Id sizes = Emit(OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod));
+        const Id size = Emit(OpCompositeExtract(t_int, sizes, meta->element));
+        return BitcastTo<Type::Float>(size);
+    }
+
+    Id TextureQueryLod(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id TexelFetch(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id Branch(Operation operation) {
+        const auto target = std::get_if<ImmediateNode>(operation[0]);
+        UNIMPLEMENTED_IF(!target);
+
+        Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue())));
+        BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+        return {};
+    }
+
+    Id PushFlowStack(Operation operation) {
+        const auto target = std::get_if<ImmediateNode>(operation[0]);
+        ASSERT(target);
+
+        const Id current = Emit(OpLoad(t_uint, flow_stack_top));
+        const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1)));
+        const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current));
+
+        Emit(OpStore(access, Constant(t_uint, target->GetValue())));
+        Emit(OpStore(flow_stack_top, next));
+        return {};
+    }
+
+    Id PopFlowStack(Operation operation) {
+        const Id current = Emit(OpLoad(t_uint, flow_stack_top));
+        const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1)));
+        const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous));
+        const Id target = Emit(OpLoad(t_uint, access));
+
+        Emit(OpStore(flow_stack_top, previous));
+        Emit(OpStore(jmp_to, target));
+        BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+        return {};
+    }
+
+    Id Exit(Operation operation) {
+        switch (stage) {
+        case ShaderStage::Vertex: {
+            // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
+            // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
+            const Id position = AccessElement(t_float4, per_vertex, position_index);
+            Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2)));
+            depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
+            depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
+            Emit(OpStore(AccessElement(t_out_float, position, 2), depth));
+            break;
+        }
+        case ShaderStage::Fragment: {
+            const auto SafeGetRegister = [&](u32 reg) {
+                // TODO(Rodrigo): Replace with contains once C++20 releases
+                if (const auto it = registers.find(reg); it != registers.end()) {
+                    return Emit(OpLoad(t_float, it->second));
+                }
+                return Constant(t_float, 0.0f);
+            };
+
+            UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
+                                 "Sample mask write is unimplemented");
+
+            // TODO(Rodrigo): Alpha testing
+
+            // Write the color outputs using the data in the shader registers, disabled
+            // rendertargets/components are skipped in the register assignment.
+            u32 current_reg = 0;
+            for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+                // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
+                for (u32 component = 0; component < 4; ++component) {
+                    if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
+                        Emit(OpStore(AccessElement(t_out_float, frag_colors.at(rt), component),
+                                     SafeGetRegister(current_reg)));
+                        ++current_reg;
+                    }
+                }
+            }
+            if (header.ps.omap.depth) {
+                // The depth output is always 2 registers after the last color output, and
+                // current_reg already contains one past the last color register.
+                Emit(OpStore(frag_depth, SafeGetRegister(current_reg + 1)));
+            }
+            break;
+        }
+        }
+
+        BranchingOp([&]() { Emit(OpReturn()); });
+        return {};
+    }
+
+    Id Discard(Operation operation) {
+        BranchingOp([&]() { Emit(OpKill()); });
+        return {};
+    }
+
+    Id EmitVertex(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id EndPrimitive(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id YNegate(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
+                      const std::string& name) {
+        const Id id = OpVariable(type, storage);
+        Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
+        AddGlobalVariable(Name(id, name));
+        interfaces.push_back(id);
+        return id;
+    }
+
+    bool IsRenderTargetUsed(u32 rt) const {
+        for (u32 component = 0; component < 4; ++component) {
+            if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    template <typename... Args>
+    Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
+        std::vector<Id> members;
+        auto elements = {elements_...};
+        for (const auto element : elements) {
+            members.push_back(Constant(t_uint, element));
+        }
+
+        return Emit(OpAccessChain(pointer_type, composite, members));
+    }
+
+    template <Type type>
+    Id VisitOperand(Operation operation, std::size_t operand_index) {
+        const Id value = Visit(operation[operand_index]);
+
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+            return Emit(OpBitcast(t_int, value));
+        case Type::Uint:
+            return Emit(OpBitcast(t_uint, value));
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return value;
+    }
+
+    template <Type type>
+    Id BitcastFrom(Id value) {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+        case Type::Uint:
+            return Emit(OpBitcast(t_float, value));
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return value;
+    }
+
+    template <Type type>
+    Id BitcastTo(Id value) {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+            UNREACHABLE();
+        case Type::Float:
+            return Emit(OpBitcast(t_float, value));
+        case Type::Int:
+            return Emit(OpBitcast(t_int, value));
+        case Type::Uint:
+            return Emit(OpBitcast(t_uint, value));
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return value;
+    }
+
+    Id GetTypeDefinition(Type type) {
+        switch (type) {
+        case Type::Bool:
+            return t_bool;
+        case Type::Bool2:
+            return t_bool2;
+        case Type::Float:
+            return t_float;
+        case Type::Int:
+            return t_int;
+        case Type::Uint:
+            return t_uint;
+        case Type::HalfFloat:
+            UNIMPLEMENTED();
+        }
+        UNREACHABLE();
+        return {};
+    }
+
+    void BranchingOp(std::function<void()> call) {
+        const Id true_label = OpLabel();
+        const Id skip_label = OpLabel();
+        Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::Flatten));
+        Emit(OpBranchConditional(v_true, true_label, skip_label, 1, 0));
+        Emit(true_label);
+        call();
+
+        Emit(skip_label);
+    }
+
+    static constexpr OperationDecompilersArray operation_decompilers = {
+        &SPIRVDecompiler::Assign,
+
+        &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
+                                  Type::Float>,
+
+        &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
+        &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
+        &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
+
+        &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
+
+        &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
+        &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
+        &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
+        &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
+
+        &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
+        &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
+        &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
+        &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
+        &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
+        &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
+
+        &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
+        &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
+        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
+        &SPIRVDecompiler::HNegate,
+        &SPIRVDecompiler::HMergeF32,
+        &SPIRVDecompiler::HMergeH0,
+        &SPIRVDecompiler::HMergeH1,
+        &SPIRVDecompiler::HPack2,
+
+        &SPIRVDecompiler::LogicalAssign,
+        &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
+        &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
+        &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
+        &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
+        &SPIRVDecompiler::LogicalPick2,
+        &SPIRVDecompiler::LogicalAll2,
+        &SPIRVDecompiler::LogicalAny2,
+
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
+        &SPIRVDecompiler::Unary<&Module::OpIsNan, Type::Bool>,
+
+        &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
+        &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
+
+        &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
+        &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
+
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>,
+        &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>,
+
+        &SPIRVDecompiler::Texture,
+        &SPIRVDecompiler::TextureLod,
+        &SPIRVDecompiler::TextureGather,
+        &SPIRVDecompiler::TextureQueryDimensions,
+        &SPIRVDecompiler::TextureQueryLod,
+        &SPIRVDecompiler::TexelFetch,
+
+        &SPIRVDecompiler::Branch,
+        &SPIRVDecompiler::PushFlowStack,
+        &SPIRVDecompiler::PopFlowStack,
+        &SPIRVDecompiler::Exit,
+        &SPIRVDecompiler::Discard,
+
+        &SPIRVDecompiler::EmitVertex,
+        &SPIRVDecompiler::EndPrimitive,
+
+        &SPIRVDecompiler::YNegate,
+    };
+
+    const ShaderIR& ir;
+    const ShaderStage stage;
+    const Tegra::Shader::Header header;
+
+    const Id t_void = Name(TypeVoid(), "void");
+
+    const Id t_bool = Name(TypeBool(), "bool");
+    const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
+
+    const Id t_int = Name(TypeInt(32, true), "int");
+    const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
+    const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
+    const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
+
+    const Id t_uint = Name(TypeInt(32, false), "uint");
+    const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
+    const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
+    const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
+
+    const Id t_float = Name(TypeFloat(32), "float");
+    const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
+    const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
+    const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
+
+    const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
+    const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
+
+    const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
+
+    const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
+    const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
+    const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
+    const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
+
+    const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
+    const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
+
+    const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
+    const Id t_cbuf_array =
+        Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"),
+                 spv::Decoration::ArrayStride, CBUF_STRIDE);
+    const Id t_cbuf_struct = MemberDecorate(
+        Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct);
+
+    const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
+    const Id t_gmem_array =
+        Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4u), "GmemArray");
+    const Id t_gmem_struct = MemberDecorate(
+        Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
+
+    const Id v_float_zero = Constant(t_float, 0.0f);
+    const Id v_true = ConstantTrue(t_bool);
+    const Id v_false = ConstantFalse(t_bool);
+
+    Id per_vertex{};
+    std::map<u32, Id> registers;
+    std::map<Tegra::Shader::Pred, Id> predicates;
+    Id local_memory{};
+    std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
+    std::map<Attribute::Index, Id> input_attributes;
+    std::map<Attribute::Index, Id> output_attributes;
+    std::map<u32, Id> constant_buffers;
+    std::map<GlobalMemoryBase, Id> global_buffers;
+    std::map<u32, SamplerImage> sampler_images;
+
+    Id instance_index{};
+    Id vertex_index{};
+    std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
+    Id frag_depth{};
+    Id frag_coord{};
+    Id front_facing{};
+
+    u32 position_index{};
+    u32 point_size_index{};
+    u32 clip_distances_index{};
+
+    std::vector<Id> interfaces;
+
+    u32 const_buffers_base_binding{};
+    u32 global_buffers_base_binding{};
+    u32 samplers_base_binding{};
+
+    Id execute_function{};
+    Id jmp_to{};
+    Id flow_stack_top{};
+    Id flow_stack{};
+    Id continue_label{};
+    std::map<u32, Id> labels;
+};
+
+DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) {
+    auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage);
+    decompiler->Decompile();
+    return {std::move(decompiler), decompiler->GetShaderEntries()};
+}
+
+} // namespace Vulkan::VKShader
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
new file mode 100644
index 000000000..329d8fa38
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+class ShaderIR;
+}
+
+namespace Vulkan::VKShader {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+using SamplerEntry = VideoCommon::Shader::Sampler;
+
+constexpr u32 DESCRIPTOR_SET = 0;
+
+class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
+public:
+    explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index)
+        : VideoCommon::Shader::ConstBuffer{entry}, index{index} {}
+
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+private:
+    u32 index{};
+};
+
+class GlobalBufferEntry {
+public:
+    explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+
+    u32 GetCbufIndex() const {
+        return cbuf_index;
+    }
+
+    u32 GetCbufOffset() const {
+        return cbuf_offset;
+    }
+
+private:
+    u32 cbuf_index{};
+    u32 cbuf_offset{};
+};
+
+struct ShaderEntries {
+    u32 const_buffers_base_binding{};
+    u32 global_buffers_base_binding{};
+    u32 samplers_base_binding{};
+    std::vector<ConstBufferEntry> const_buffers;
+    std::vector<GlobalBufferEntry> global_buffers;
+    std::vector<SamplerEntry> samplers;
+    std::set<u32> attributes;
+    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+    std::size_t shader_length{};
+    Sirit::Id entry_function{};
+    std::vector<Sirit::Id> interfaces;
+};
+
+using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
+
+DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage);
+
+} // namespace Vulkan::VKShader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index c34843307..db15c0718 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -29,39 +29,55 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
     const bool is_signed_b = instr.xmad.sign_b == 1;
     const bool is_signed_c = is_signed_a;
 
-    auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> {
+    auto [is_merge, is_psl, is_high_b, mode, op_b,
+          op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
         switch (opcode->get().GetId()) {
         case OpCode::Id::XMAD_CR:
             return {instr.xmad.merge_56,
+                    instr.xmad.product_shift_left_second,
+                    instr.xmad.high_b,
+                    instr.xmad.mode_cbf,
                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                     GetRegister(instr.gpr39)};
         case OpCode::Id::XMAD_RR:
-            return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+            return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
+                    instr.xmad.mode,     GetRegister(instr.gpr20),      GetRegister(instr.gpr39)};
         case OpCode::Id::XMAD_RC:
-            return {false, GetRegister(instr.gpr39),
+            return {false,
+                    false,
+                    instr.xmad.high_b,
+                    instr.xmad.mode_cbf,
+                    GetRegister(instr.gpr39),
                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
         case OpCode::Id::XMAD_IMM:
-            return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
+            return {instr.xmad.merge_37,
+                    instr.xmad.product_shift_left,
+                    false,
+                    instr.xmad.mode,
+                    Immediate(static_cast<u32>(instr.xmad.imm20_16)),
                     GetRegister(instr.gpr39)};
         }
         UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
-        return {false, Immediate(0), Immediate(0)};
+        return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
     }();
 
     op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
 
     const Node original_b = op_b;
-    op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16);
+    op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16);
 
     // TODO(Rodrigo): Use an appropiate sign for this operation
     Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
-    if (instr.xmad.product_shift_left) {
+    if (is_psl) {
         product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
     }
+    SetTemporal(bb, 0, product);
+    product = GetTemporal(0);
 
     const Node original_c = op_c;
+    const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
     op_c = [&]() {
-        switch (instr.xmad.mode) {
+        switch (set_mode) {
         case Tegra::Shader::XmadMode::None:
             return original_c;
         case Tegra::Shader::XmadMode::CLo:
@@ -80,8 +96,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
         }
     }();
 
+    SetTemporal(bb, 1, op_c);
+    op_c = GetTemporal(1);
+
     // TODO(Rodrigo): Use an appropiate sign for this operation
     Node sum = Operation(OperationCode::IAdd, product, op_c);
+    SetTemporal(bb, 2, sum);
+    sum = GetTemporal(2);
     if (is_merge) {
         const Node a = BitfieldExtract(sum, 0, 16);
         const Node b =
@@ -95,4 +116,4 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-} // namespace VideoCommon::Shader
-\ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp
new file mode 100644
index 000000000..e96eba7cc
--- /dev/null
+++ b/src/video_core/texture_cache.cpp
@@ -0,0 +1,386 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache.h"
+#include "video_core/textures/decoders.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using VideoCore::Surface::SurfaceTarget;
+
+using VideoCore::Surface::ComponentTypeFromDepthFormat;
+using VideoCore::Surface::ComponentTypeFromRenderTarget;
+using VideoCore::Surface::ComponentTypeFromTexture;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::PixelFormatFromTextureFormat;
+using VideoCore::Surface::SurfaceTargetFromTextureType;
+
+constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
+    return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
+}
+
+SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
+                                              const Tegra::Texture::FullTextureInfo& config) {
+    SurfaceParams params;
+    params.is_tiled = config.tic.IsTiled();
+    params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
+    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
+    params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
+    params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
+    params.pixel_format =
+        PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false);
+    params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
+    params.type = GetFormatType(params.pixel_format);
+    params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
+    params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
+    params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
+    params.depth = config.tic.Depth();
+    if (params.target == SurfaceTarget::TextureCubemap ||
+        params.target == SurfaceTarget::TextureCubeArray) {
+        params.depth *= 6;
+    }
+    params.pitch = params.is_tiled ? 0 : config.tic.Pitch();
+    params.unaligned_height = config.tic.Height();
+    params.num_levels = config.tic.max_mip_level + 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+SurfaceParams SurfaceParams::CreateForDepthBuffer(
+    Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
+    u32 block_width, u32 block_height, u32 block_depth,
+    Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
+    SurfaceParams params;
+    params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+    params.block_width = 1 << std::min(block_width, 5U);
+    params.block_height = 1 << std::min(block_height, 5U);
+    params.block_depth = 1 << std::min(block_depth, 5U);
+    params.tile_width_spacing = 1;
+    params.pixel_format = PixelFormatFromDepthFormat(format);
+    params.component_type = ComponentTypeFromDepthFormat(format);
+    params.type = GetFormatType(params.pixel_format);
+    params.width = zeta_width;
+    params.height = zeta_height;
+    params.unaligned_height = zeta_height;
+    params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
+    params.num_levels = 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
+    const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+    SurfaceParams params;
+    params.is_tiled =
+        config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+    params.block_width = 1 << config.memory_layout.block_width;
+    params.block_height = 1 << config.memory_layout.block_height;
+    params.block_depth = 1 << config.memory_layout.block_depth;
+    params.tile_width_spacing = 1;
+    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.component_type = ComponentTypeFromRenderTarget(config.format);
+    params.type = GetFormatType(params.pixel_format);
+    if (params.is_tiled) {
+        params.width = config.width;
+    } else {
+        const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
+        params.pitch = config.width;
+        params.width = params.pitch / bpp;
+    }
+    params.height = config.height;
+    params.depth = 1;
+    params.unaligned_height = config.height;
+    params.target = SurfaceTarget::Texture2D;
+    params.num_levels = 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+SurfaceParams SurfaceParams::CreateForFermiCopySurface(
+    const Tegra::Engines::Fermi2D::Regs::Surface& config) {
+    SurfaceParams params{};
+    params.is_tiled = !config.linear;
+    params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
+    params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
+    params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
+    params.tile_width_spacing = 1;
+    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.component_type = ComponentTypeFromRenderTarget(config.format);
+    params.type = GetFormatType(params.pixel_format);
+    params.width = config.width;
+    params.height = config.height;
+    params.unaligned_height = config.height;
+    // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
+    params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
+    params.num_levels = 1;
+
+    params.CalculateCachedValues();
+    return params;
+}
+
+u32 SurfaceParams::GetMipWidth(u32 level) const {
+    return std::max(1U, width >> level);
+}
+
+u32 SurfaceParams::GetMipHeight(u32 level) const {
+    return std::max(1U, height >> level);
+}
+
+u32 SurfaceParams::GetMipDepth(u32 level) const {
+    return IsLayered() ? depth : std::max(1U, depth >> level);
+}
+
+bool SurfaceParams::IsLayered() const {
+    switch (target) {
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubeArray:
+    case SurfaceTarget::TextureCubemap:
+        return true;
+    default:
+        return false;
+    }
+}
+
+u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
+    // Auto block resizing algorithm from:
+    // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+    if (level == 0) {
+        return block_height;
+    }
+    const u32 height{GetMipHeight(level)};
+    const u32 default_block_height{GetDefaultBlockHeight(pixel_format)};
+    const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height};
+    u32 block_height = 16;
+    while (block_height > 1 && blocks_in_y <= block_height * 4) {
+        block_height >>= 1;
+    }
+    return block_height;
+}
+
+u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
+    if (level == 0)
+        return block_depth;
+    if (target != SurfaceTarget::Texture3D)
+        return 1;
+
+    const u32 depth{GetMipDepth(level)};
+    u32 block_depth = 32;
+    while (block_depth > 1 && depth * 2 <= block_depth) {
+        block_depth >>= 1;
+    }
+    if (block_depth == 32 && GetMipBlockHeight(level) >= 4) {
+        return 16;
+    }
+    return block_depth;
+}
+
+std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
+    std::size_t offset = 0;
+    for (u32 i = 0; i < level; i++) {
+        offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false);
+    }
+    return offset;
+}
+
+std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
+    std::size_t offset = 0;
+    for (u32 i = 0; i < level; i++) {
+        offset += GetInnerMipmapMemorySize(i, true, false, false);
+    }
+    return offset;
+}
+
+std::size_t SurfaceParams::GetGuestLayerSize() const {
+    return GetInnerMemorySize(false, true, false);
+}
+
+std::size_t SurfaceParams::GetHostLayerSize(u32 level) const {
+    return GetInnerMipmapMemorySize(level, true, IsLayered(), false);
+}
+
+bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const {
+    if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) !=
+        std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format,
+                 view_params.component_type, view_params.type)) {
+        return false;
+    }
+
+    const SurfaceTarget view_target{view_params.target};
+    if (view_target == target) {
+        return true;
+    }
+
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D:
+        return false;
+    case SurfaceTarget::Texture1DArray:
+        return view_target == SurfaceTarget::Texture1D;
+    case SurfaceTarget::Texture2DArray:
+        return view_target == SurfaceTarget::Texture2D;
+    case SurfaceTarget::TextureCubemap:
+        return view_target == SurfaceTarget::Texture2D ||
+               view_target == SurfaceTarget::Texture2DArray;
+    case SurfaceTarget::TextureCubeArray:
+        return view_target == SurfaceTarget::Texture2D ||
+               view_target == SurfaceTarget::Texture2DArray ||
+               view_target == SurfaceTarget::TextureCubemap;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target));
+        return false;
+    }
+}
+
+bool SurfaceParams::IsPixelFormatZeta() const {
+    return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
+           pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
+}
+
+void SurfaceParams::CalculateCachedValues() {
+    guest_size_in_bytes = GetInnerMemorySize(false, false, false);
+
+    // ASTC is uncompressed in software, in emulated as RGBA8
+    if (IsPixelFormatASTC(pixel_format)) {
+        host_size_in_bytes = width * height * depth * 4;
+    } else {
+        host_size_in_bytes = GetInnerMemorySize(true, false, false);
+    }
+
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D:
+        num_layers = 1;
+        break;
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray:
+        num_layers = depth;
+        break;
+    default:
+        UNREACHABLE();
+    }
+}
+
+std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
+                                                    bool uncompressed) const {
+    const bool tiled{as_host_size ? false : is_tiled};
+    const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
+    const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
+    const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)};
+    const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)};
+    const u32 depth{layer_only ? 1U : GetMipDepth(level)};
+    return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height,
+                                         depth, GetMipBlockHeight(level), GetMipBlockDepth(level));
+}
+
+std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only,
+                                              bool uncompressed) const {
+    std::size_t size = 0;
+    for (u32 level = 0; level < num_levels; ++level) {
+        size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed);
+    }
+    if (!as_host_size && is_tiled) {
+        size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth);
+    }
+    return size;
+}
+
+std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const {
+    std::map<u64, std::pair<u32, u32>> view_offset_map;
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture3D: {
+        constexpr u32 layer = 0;
+        for (u32 level = 0; level < num_levels; ++level) {
+            const std::size_t offset{GetGuestMipmapLevelOffset(level)};
+            view_offset_map.insert({offset, {layer, level}});
+        }
+        break;
+    }
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray: {
+        const std::size_t layer_size{GetGuestLayerSize()};
+        for (u32 level = 0; level < num_levels; ++level) {
+            const std::size_t level_offset{GetGuestMipmapLevelOffset(level)};
+            for (u32 layer = 0; layer < num_layers; ++layer) {
+                const auto layer_offset{static_cast<std::size_t>(layer_size * layer)};
+                const std::size_t offset{level_offset + layer_offset};
+                view_offset_map.insert({offset, {layer, level}});
+            }
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target));
+    }
+    return view_offset_map;
+}
+
+bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const {
+    return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) &&
+           IsInBounds(view_params, layer, level);
+}
+
+bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const {
+    return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level);
+}
+
+bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const {
+    if (view_params.target != SurfaceTarget::Texture3D) {
+        return true;
+    }
+    return view_params.depth == GetMipDepth(level);
+}
+
+bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const {
+    return layer + view_params.num_layers <= num_layers &&
+           level + view_params.num_levels <= num_levels;
+}
+
+std::size_t HasheableSurfaceParams::Hash() const {
+    return static_cast<std::size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
+}
+
+bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const {
+    return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
+                    height, depth, pitch, unaligned_height, num_levels, pixel_format,
+                    component_type, type, target) ==
+           std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
+                    rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
+                    rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type,
+                    rhs.type, rhs.target);
+}
+
+std::size_t ViewKey::Hash() const {
+    return static_cast<std::size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
+}
+
+bool ViewKey::operator==(const ViewKey& rhs) const {
+    return std::tie(base_layer, num_layers, base_level, num_levels) ==
+           std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels);
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h
new file mode 100644
index 000000000..041551691
--- /dev/null
+++ b/src/video_core/texture_cache.h
@@ -0,0 +1,586 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <list>
+#include <memory>
+#include <set>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
+
+#include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range.hpp>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/memory.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra::Texture {
+struct FullTextureInfo;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace VideoCommon {
+
+class HasheableSurfaceParams {
+public:
+    std::size_t Hash() const;
+
+    bool operator==(const HasheableSurfaceParams& rhs) const;
+
+protected:
+    // Avoid creation outside of a managed environment.
+    HasheableSurfaceParams() = default;
+
+    bool is_tiled;
+    u32 block_width;
+    u32 block_height;
+    u32 block_depth;
+    u32 tile_width_spacing;
+    u32 width;
+    u32 height;
+    u32 depth;
+    u32 pitch;
+    u32 unaligned_height;
+    u32 num_levels;
+    VideoCore::Surface::PixelFormat pixel_format;
+    VideoCore::Surface::ComponentType component_type;
+    VideoCore::Surface::SurfaceType type;
+    VideoCore::Surface::SurfaceTarget target;
+};
+
+class SurfaceParams final : public HasheableSurfaceParams {
+public:
+    /// Creates SurfaceCachedParams from a texture configuration.
+    static SurfaceParams CreateForTexture(Core::System& system,
+                                          const Tegra::Texture::FullTextureInfo& config);
+
+    /// Creates SurfaceCachedParams for a depth buffer configuration.
+    static SurfaceParams CreateForDepthBuffer(
+        Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
+        u32 block_width, u32 block_height, u32 block_depth,
+        Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
+
+    /// Creates SurfaceCachedParams from a framebuffer configuration.
+    static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
+
+    /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
+    static SurfaceParams CreateForFermiCopySurface(
+        const Tegra::Engines::Fermi2D::Regs::Surface& config);
+
+    bool IsTiled() const {
+        return is_tiled;
+    }
+
+    u32 GetBlockWidth() const {
+        return block_width;
+    }
+
+    u32 GetTileWidthSpacing() const {
+        return tile_width_spacing;
+    }
+
+    u32 GetWidth() const {
+        return width;
+    }
+
+    u32 GetHeight() const {
+        return height;
+    }
+
+    u32 GetDepth() const {
+        return depth;
+    }
+
+    u32 GetPitch() const {
+        return pitch;
+    }
+
+    u32 GetNumLevels() const {
+        return num_levels;
+    }
+
+    VideoCore::Surface::PixelFormat GetPixelFormat() const {
+        return pixel_format;
+    }
+
+    VideoCore::Surface::ComponentType GetComponentType() const {
+        return component_type;
+    }
+
+    VideoCore::Surface::SurfaceTarget GetTarget() const {
+        return target;
+    }
+
+    VideoCore::Surface::SurfaceType GetType() const {
+        return type;
+    }
+
+    std::size_t GetGuestSizeInBytes() const {
+        return guest_size_in_bytes;
+    }
+
+    std::size_t GetHostSizeInBytes() const {
+        return host_size_in_bytes;
+    }
+
+    u32 GetNumLayers() const {
+        return num_layers;
+    }
+
+    /// Returns the width of a given mipmap level.
+    u32 GetMipWidth(u32 level) const;
+
+    /// Returns the height of a given mipmap level.
+    u32 GetMipHeight(u32 level) const;
+
+    /// Returns the depth of a given mipmap level.
+    u32 GetMipDepth(u32 level) const;
+
+    /// Returns true if these parameters are from a layered surface.
+    bool IsLayered() const;
+
+    /// Returns the block height of a given mipmap level.
+    u32 GetMipBlockHeight(u32 level) const;
+
+    /// Returns the block depth of a given mipmap level.
+    u32 GetMipBlockDepth(u32 level) const;
+
+    /// Returns the offset in bytes in guest memory of a given mipmap level.
+    std::size_t GetGuestMipmapLevelOffset(u32 level) const;
+
+    /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
+    std::size_t GetHostMipmapLevelOffset(u32 level) const;
+
+    /// Returns the size of a layer in bytes in guest memory.
+    std::size_t GetGuestLayerSize() const;
+
+    /// Returns the size of a layer in bytes in host memory for a given mipmap level.
+    std::size_t GetHostLayerSize(u32 level) const;
+
+    /// Returns true if another surface can be familiar with this. This is a loosely defined term
+    /// that reflects the possibility of these two surface parameters potentially being part of a
+    /// bigger superset.
+    bool IsFamiliar(const SurfaceParams& view_params) const;
+
+    /// Returns true if the pixel format is a depth and/or stencil format.
+    bool IsPixelFormatZeta() const;
+
+    /// Creates a map that redirects an address difference to a layer and mipmap level.
+    std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const;
+
+    /// Returns true if the passed surface view parameters is equal or a valid subset of this.
+    bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const;
+
+private:
+    /// Calculates values that can be deduced from HasheableSurfaceParams.
+    void CalculateCachedValues();
+
+    /// Returns the size of a given mipmap level.
+    std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
+                                         bool uncompressed) const;
+
+    /// Returns the size of all mipmap levels and aligns as needed.
+    std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const;
+
+    /// Returns true if the passed view width and height match the size of this params in a given
+    /// mipmap level.
+    bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const;
+
+    /// Returns true if the passed view depth match the size of this params in a given mipmap level.
+    bool IsDepthValid(const SurfaceParams& view_params, u32 level) const;
+
+    /// Returns true if the passed view layers and mipmap levels are in bounds.
+    bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const;
+
+    std::size_t guest_size_in_bytes;
+    std::size_t host_size_in_bytes;
+    u32 num_layers;
+};
+
+struct ViewKey {
+    std::size_t Hash() const;
+
+    bool operator==(const ViewKey& rhs) const;
+
+    u32 base_layer{};
+    u32 num_layers{};
+    u32 base_level{};
+    u32 num_levels{};
+};
+
+} // namespace VideoCommon
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::SurfaceParams> {
+    std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+template <>
+struct hash<VideoCommon::ViewKey> {
+    std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace VideoCommon {
+
+template <typename TView, typename TExecutionContext>
+class SurfaceBase {
+    static_assert(std::is_trivially_copyable_v<TExecutionContext>);
+
+public:
+    virtual void LoadBuffer() = 0;
+
+    virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0;
+
+    virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0;
+
+    TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) {
+        if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) {
+            // It can't be a view if it's in a prior address.
+            return {};
+        }
+
+        const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)};
+        const auto it{view_offset_map.find(relative_offset)};
+        if (it == view_offset_map.end()) {
+            // Couldn't find an aligned view.
+            return {};
+        }
+        const auto [layer, level] = it->second;
+
+        if (!params.IsViewValid(view_params, layer, level)) {
+            return {};
+        }
+
+        return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels());
+    }
+
+    VAddr GetCpuAddr() const {
+        ASSERT(is_registered);
+        return cpu_addr;
+    }
+
+    u8* GetHostPtr() const {
+        ASSERT(is_registered);
+        return host_ptr;
+    }
+
+    CacheAddr GetCacheAddr() const {
+        ASSERT(is_registered);
+        return cache_addr;
+    }
+
+    std::size_t GetSizeInBytes() const {
+        return params.GetGuestSizeInBytes();
+    }
+
+    void MarkAsModified(bool is_modified_) {
+        is_modified = is_modified_;
+    }
+
+    const SurfaceParams& GetSurfaceParams() const {
+        return params;
+    }
+
+    TView* GetView(VAddr view_addr, const SurfaceParams& view_params) {
+        TView* view{TryGetView(view_addr, view_params)};
+        ASSERT(view != nullptr);
+        return view;
+    }
+
+    void Register(VAddr cpu_addr_, u8* host_ptr_) {
+        ASSERT(!is_registered);
+        is_registered = true;
+        cpu_addr = cpu_addr_;
+        host_ptr = host_ptr_;
+        cache_addr = ToCacheAddr(host_ptr_);
+    }
+
+    void Register(VAddr cpu_addr_) {
+        Register(cpu_addr_, Memory::GetPointer(cpu_addr_));
+    }
+
+    void Unregister() {
+        ASSERT(is_registered);
+        is_registered = false;
+    }
+
+    bool IsRegistered() const {
+        return is_registered;
+    }
+
+protected:
+    explicit SurfaceBase(const SurfaceParams& params)
+        : params{params}, view_offset_map{params.CreateViewOffsetMap()} {}
+
+    ~SurfaceBase() = default;
+
+    virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0;
+
+    bool IsModified() const {
+        return is_modified;
+    }
+
+    const SurfaceParams params;
+
+private:
+    TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) {
+        const ViewKey key{base_layer, num_layers, base_level, num_levels};
+        const auto [entry, is_cache_miss] = views.try_emplace(key);
+        auto& view{entry->second};
+        if (is_cache_miss) {
+            view = CreateView(key);
+        }
+        return view.get();
+    }
+
+    const std::map<u64, std::pair<u32, u32>> view_offset_map;
+
+    VAddr cpu_addr{};
+    u8* host_ptr{};
+    CacheAddr cache_addr{};
+    bool is_modified{};
+    bool is_registered{};
+    std::unordered_map<ViewKey, std::unique_ptr<TView>> views;
+};
+
+template <typename TSurface, typename TView, typename TExecutionContext>
+class TextureCache {
+    static_assert(std::is_trivially_copyable_v<TExecutionContext>);
+    using ResultType = std::tuple<TView*, TExecutionContext>;
+    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>;
+    using IntervalType = typename IntervalMap::interval_type;
+
+public:
+    void InvalidateRegion(CacheAddr addr, std::size_t size) {
+        for (TSurface* surface : GetSurfacesInRegion(addr, size)) {
+            if (!surface->IsRegistered()) {
+                // Skip duplicates
+                continue;
+            }
+            Unregister(surface);
+        }
+    }
+
+    ResultType GetTextureSurface(TExecutionContext exctx,
+                                 const Tegra::Texture::FullTextureInfo& config) {
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())};
+        if (!cpu_addr) {
+            return {{}, exctx};
+        }
+        const auto params{SurfaceParams::CreateForTexture(system, config)};
+        return GetSurfaceView(exctx, *cpu_addr, params, true);
+    }
+
+    ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) {
+        const auto& regs{system.GPU().Maxwell3D().regs};
+        if (!regs.zeta.Address() || !regs.zeta_enable) {
+            return {{}, exctx};
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())};
+        if (!cpu_addr) {
+            return {{}, exctx};
+        }
+
+        const auto depth_params{SurfaceParams::CreateForDepthBuffer(
+            system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
+            regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
+            regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
+        return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents);
+    }
+
+    ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index,
+                                     bool preserve_contents) {
+        ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
+
+        const auto& regs{system.GPU().Maxwell3D().regs};
+        if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
+            regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
+            return {{}, exctx};
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(
+            config.Address() + config.base_layer * config.layer_stride * sizeof(u32))};
+        if (!cpu_addr) {
+            return {{}, exctx};
+        }
+
+        return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
+                              preserve_contents);
+    }
+
+    ResultType GetFermiSurface(TExecutionContext exctx,
+                               const Tegra::Engines::Fermi2D::Regs::Surface& config) {
+        const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())};
+        ASSERT(cpu_addr);
+        return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config),
+                              true);
+    }
+
+    TSurface* TryFindFramebufferSurface(const u8* host_ptr) const {
+        const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))};
+        return it != registered_surfaces.end() ? *it->second.begin() : nullptr;
+    }
+
+protected:
+    TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
+        : system{system}, rasterizer{rasterizer} {}
+
+    ~TextureCache() = default;
+
+    virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
+                                             const SurfaceParams& params, bool preserve_contents,
+                                             const std::vector<TSurface*>& overlaps) = 0;
+
+    virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0;
+
+    void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) {
+        surface->Register(cpu_addr, host_ptr);
+        registered_surfaces.add({GetSurfaceInterval(surface), {surface}});
+        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1);
+    }
+
+    void Unregister(TSurface* surface) {
+        registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}});
+        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1);
+        surface->Unregister();
+    }
+
+    TSurface* GetUncachedSurface(const SurfaceParams& params) {
+        if (TSurface* surface = TryGetReservedSurface(params); surface)
+            return surface;
+        // No reserved surface available, create a new one and reserve it
+        auto new_surface{CreateSurface(params)};
+        TSurface* surface{new_surface.get()};
+        ReserveSurface(params, std::move(new_surface));
+        return surface;
+    }
+
+    Core::System& system;
+
+private:
+    ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params,
+                              bool preserve_contents) {
+        const auto host_ptr{Memory::GetPointer(cpu_addr)};
+        const auto cache_addr{ToCacheAddr(host_ptr)};
+        const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())};
+        if (overlaps.empty()) {
+            return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
+        }
+
+        if (overlaps.size() == 1) {
+            if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view)
+                return {view, exctx};
+        }
+
+        TView* fast_view;
+        std::tie(fast_view, exctx) =
+            TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps);
+
+        for (TSurface* surface : overlaps) {
+            if (!fast_view) {
+                // Flush even when we don't care about the contents, to preserve memory not written
+                // by the new surface.
+                exctx = surface->FlushBuffer(exctx);
+            }
+            Unregister(surface);
+        }
+
+        if (fast_view) {
+            return {fast_view, exctx};
+        }
+
+        return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
+    }
+
+    ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
+                               const SurfaceParams& params, bool preserve_contents) {
+        TSurface* new_surface{GetUncachedSurface(params)};
+        Register(new_surface, cpu_addr, host_ptr);
+        if (preserve_contents) {
+            exctx = LoadSurface(exctx, new_surface);
+        }
+        return {new_surface->GetView(cpu_addr, params), exctx};
+    }
+
+    TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) {
+        surface->LoadBuffer();
+        exctx = surface->UploadTexture(exctx);
+        surface->MarkAsModified(false);
+        return exctx;
+    }
+
+    std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {
+        if (size == 0) {
+            return {};
+        }
+        const IntervalType interval{cache_addr, cache_addr + size};
+
+        std::vector<TSurface*> surfaces;
+        for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) {
+            surfaces.push_back(*pair.second.begin());
+        }
+        return surfaces;
+    }
+
+    void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) {
+        surface_reserve[params].push_back(std::move(surface));
+    }
+
+    TSurface* TryGetReservedSurface(const SurfaceParams& params) {
+        auto search{surface_reserve.find(params)};
+        if (search == surface_reserve.end()) {
+            return {};
+        }
+        for (auto& surface : search->second) {
+            if (!surface->IsRegistered()) {
+                return surface.get();
+            }
+        }
+        return {};
+    }
+
+    IntervalType GetSurfaceInterval(TSurface* surface) const {
+        return IntervalType::right_open(surface->GetCacheAddr(),
+                                        surface->GetCacheAddr() + surface->GetSizeInBytes());
+    }
+
+    VideoCore::RasterizerInterface& rasterizer;
+
+    IntervalMap registered_surfaces;
+
+    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
+    /// previously been used. This is to prevent surfaces from being constantly created and
+    /// destroyed when used with different surface parameters.
+    std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
index 5e439f036..82050bd51 100644
--- a/src/video_core/textures/convert.cpp
+++ b/src/video_core/textures/convert.cpp
@@ -10,6 +10,7 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "video_core/surface.h"
 #include "video_core/textures/astc.h"
 #include "video_core/textures/convert.h"
 
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
index 07cd8b5da..12542e71c 100644
--- a/src/video_core/textures/convert.h
+++ b/src/video_core/textures/convert.h
@@ -5,7 +5,10 @@
 #pragma once
 
 #include "common/common_types.h"
-#include "video_core/surface.h"
+
+namespace VideoCore::Surface {
+enum class PixelFormat;
+}
 
 namespace Tegra::Texture {
 
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 93ecc6e31..bea0d5bc2 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -7,9 +7,7 @@
 #include <array>
 #include "common/assert.h"
 #include "common/bit_field.h"
-#include "common/common_funcs.h"
 #include "common/common_types.h"
-#include "video_core/memory_manager.h"
 
 namespace Tegra::Texture {