6 files changed, 453 insertions, 48 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 6b9382f06..34053e393 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -26,8 +26,8 @@ void Fermi2D::WriteReg(u32 method, u32 value) {
 }
 
 void Fermi2D::HandleSurfaceCopy() {
-    NGLOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
-                  static_cast<u32>(regs.operation));
+    LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
+                static_cast<u32>(regs.operation));
 
     const GPUVAddr source = regs.src.Address();
     const GPUVAddr dest = regs.dst.Address();
@@ -47,6 +47,7 @@ void Fermi2D::HandleSurfaceCopy() {
 
     if (regs.src.linear == regs.dst.linear) {
         // If the input layout and the output layout are the same, just perform a raw copy.
+        ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
         Memory::CopyBlock(dest_cpu, source_cpu,
                           src_bytes_per_pixel * regs.dst.width * regs.dst.height);
         return;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 86e9dc998..3bca16364 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -126,6 +126,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
         DrawArrays();
         break;
     }
+    case MAXWELL3D_REG_INDEX(clear_buffers): {
+        ProcessClearBuffers();
+        break;
+    }
     case MAXWELL3D_REG_INDEX(query.query_get): {
         ProcessQueryGet();
         break;
@@ -207,8 +211,8 @@ void Maxwell3D::ProcessQueryGet() {
 }
 
 void Maxwell3D::DrawArrays() {
-    NGLOG_DEBUG(HW_GPU, "called, topology={}, count={}",
-                static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count);
+    LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+              regs.vertex_buffer.count);
     ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
 
     auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
@@ -328,8 +332,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
 
         Texture::FullTextureInfo tex_info{};
         // TODO(Subv): Use the shader to determine which textures are actually accessed.
-        tex_info.index = (current_texture - tex_info_buffer.address - TextureInfoOffset) /
-                         sizeof(Texture::TextureHandle);
+        tex_info.index =
+            static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) /
+            sizeof(Texture::TextureHandle);
 
         // Load the TIC data.
         if (tex_handle.tic_id != 0) {
@@ -414,5 +419,13 @@ bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
     UNREACHABLE();
 }
 
+void Maxwell3D::ProcessClearBuffers() {
+    ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
+           regs.clear_buffers.R == regs.clear_buffers.B &&
+           regs.clear_buffers.R == regs.clear_buffers.A);
+
+    VideoCore::g_renderer->Rasterizer()->Clear();
+}
+
 } // namespace Engines
 } // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 2dc251205..5a7cf0107 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -280,6 +280,46 @@ public:
             UnsignedInt = 0x2,
         };
 
+        enum class ComparisonOp : u32 {
+            // These values are used by Nouveau and most games, they correspond to the OpenGL token
+            // values for these operations.
+            Never = 0x200,
+            Less = 0x201,
+            Equal = 0x202,
+            LessEqual = 0x203,
+            Greater = 0x204,
+            NotEqual = 0x205,
+            GreaterEqual = 0x206,
+            Always = 0x207,
+
+            // These values are used by some games, they seem to be NV04 values.
+            NeverOld = 1,
+            LessOld = 2,
+            EqualOld = 3,
+            LessEqualOld = 4,
+            GreaterOld = 5,
+            NotEqualOld = 6,
+            GreaterEqualOld = 7,
+            AlwaysOld = 8,
+        };
+
+        struct Cull {
+            enum class FrontFace : u32 {
+                ClockWise = 0x0900,
+                CounterClockWise = 0x0901,
+            };
+
+            enum class CullFace : u32 {
+                Front = 0x0404,
+                Back = 0x0405,
+                FrontAndBack = 0x0408,
+            };
+
+            u32 enabled;
+            FrontFace front_face;
+            CullFace cull_face;
+        };
+
         struct Blend {
             enum class Equation : u32 {
                 Add = 1,
@@ -321,6 +361,24 @@ public:
             INSERT_PADDING_WORDS(1);
         };
 
+        struct RenderTargetConfig {
+            u32 address_high;
+            u32 address_low;
+            u32 width;
+            u32 height;
+            Tegra::RenderTargetFormat format;
+            u32 block_dimensions;
+            u32 array_mode;
+            u32 layer_stride;
+            u32 base_layer;
+            INSERT_PADDING_WORDS(7);
+
+            GPUVAddr Address() const {
+                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                             address_low);
+            }
+        };
+
         union {
             struct {
                 INSERT_PADDING_WORDS(0x45);
@@ -333,23 +391,7 @@ public:
 
                 INSERT_PADDING_WORDS(0x1B8);
 
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-                    u32 width;
-                    u32 height;
-                    Tegra::RenderTargetFormat format;
-                    u32 block_dimensions;
-                    u32 array_mode;
-                    u32 layer_stride;
-                    u32 base_layer;
-                    INSERT_PADDING_WORDS(7);
-
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-                } rt[NumRenderTargets];
+                RenderTargetConfig rt[NumRenderTargets];
 
                 struct {
                     f32 scale_x;
@@ -406,12 +448,17 @@ public:
                     u32 count;
                 } vertex_buffer;
 
-                INSERT_PADDING_WORDS(0x99);
+                INSERT_PADDING_WORDS(1);
+
+                float clear_color[4];
+                float clear_depth;
+
+                INSERT_PADDING_WORDS(0x93);
 
                 struct {
                     u32 address_high;
                     u32 address_low;
-                    u32 format;
+                    Tegra::DepthFormat format;
                     u32 block_dimensions;
                     u32 layer_stride;
 
@@ -433,11 +480,23 @@ public:
                     };
                 } rt_control;
 
-                INSERT_PADDING_WORDS(0x31);
+                INSERT_PADDING_WORDS(0x2B);
+
+                u32 depth_test_enable;
+
+                INSERT_PADDING_WORDS(0x5);
 
                 u32 independent_blend_enable;
 
-                INSERT_PADDING_WORDS(0x15);
+                u32 depth_write_enabled;
+
+                INSERT_PADDING_WORDS(0x7);
+
+                u32 d3d_cull_mode;
+
+                ComparisonOp depth_test_func;
+
+                INSERT_PADDING_WORDS(0xB);
 
                 struct {
                     u32 separate_alpha;
@@ -453,7 +512,17 @@ public:
                     u32 enable[NumRenderTargets];
                 } blend;
 
-                INSERT_PADDING_WORDS(0x77);
+                INSERT_PADDING_WORDS(0xB);
+
+                union {
+                    BitField<4, 1, u32> triangle_rast_flip;
+                } screen_y_control;
+
+                INSERT_PADDING_WORDS(0x21);
+
+                u32 vb_element_base;
+
+                INSERT_PADDING_WORDS(0x49);
 
                 struct {
                     u32 tsc_address_high;
@@ -479,7 +548,12 @@ public:
                     }
                 } tic;
 
-                INSERT_PADDING_WORDS(0x22);
+                INSERT_PADDING_WORDS(0x21);
+
+                union {
+                    BitField<2, 1, u32> coord_origin;
+                    BitField<3, 10, u32> enable;
+                } point_coord_replace;
 
                 struct {
                     u32 code_address_high;
@@ -534,7 +608,27 @@ public:
                     }
                 } index_array;
 
-                INSERT_PADDING_WORDS(0xC7);
+                INSERT_PADDING_WORDS(0x7);
+
+                INSERT_PADDING_WORDS(0x46);
+
+                Cull cull;
+
+                INSERT_PADDING_WORDS(0x2B);
+
+                union {
+                    u32 raw;
+                    BitField<0, 1, u32> Z;
+                    BitField<1, 1, u32> S;
+                    BitField<2, 1, u32> R;
+                    BitField<3, 1, u32> G;
+                    BitField<4, 1, u32> B;
+                    BitField<5, 1, u32> A;
+                    BitField<6, 4, u32> RT;
+                    BitField<10, 11, u32> layer;
+                } clear_buffers;
+
+                INSERT_PADDING_WORDS(0x4B);
 
                 struct {
                     u32 query_address_high;
@@ -716,6 +810,9 @@ private:
     /// Handles writes to the macro uploading registers.
     void ProcessMacroUpload(u32 data);
 
+    /// Handles a write to the CLEAR_BUFFERS register.
+    void ProcessClearBuffers();
+
     /// Handles a write to the QUERY_GET register.
     void ProcessQueryGet();
 
@@ -738,16 +835,27 @@ ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform[0], 0x280);
 ASSERT_REG_POSITION(viewport, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
+ASSERT_REG_POSITION(clear_color[0], 0x360);
+ASSERT_REG_POSITION(clear_depth, 0x364);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
 ASSERT_REG_POSITION(rt_control, 0x487);
+ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
 ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
+ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
+ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
+ASSERT_REG_POSITION(depth_test_func, 0x4C3);
 ASSERT_REG_POSITION(blend, 0x4CF);
+ASSERT_REG_POSITION(screen_y_control, 0x4EB);
+ASSERT_REG_POSITION(vb_element_base, 0x50D);
 ASSERT_REG_POSITION(tsc, 0x557);
 ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(point_coord_replace, 0x581);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
 ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(cull, 0x646);
+ASSERT_REG_POSITION(clear_buffers, 0x674);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
 ASSERT_REG_POSITION(independent_blend, 0x780);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
new file mode 100644
index 000000000..6e740713f
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -0,0 +1,73 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/memory.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra {
+namespace Engines {
+
+MaxwellDMA::MaxwellDMA(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+
+void MaxwellDMA::WriteReg(u32 method, u32 value) {
+    ASSERT_MSG(method < Regs::NUM_REGS,
+               "Invalid MaxwellDMA register, increase the size of the Regs structure");
+
+    regs.reg_array[method] = value;
+
+#define MAXWELLDMA_REG_INDEX(field_name)                                                           \
+    (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
+
+    switch (method) {
+    case MAXWELLDMA_REG_INDEX(exec): {
+        HandleCopy();
+        break;
+    }
+    }
+
+#undef MAXWELLDMA_REG_INDEX
+}
+
+void MaxwellDMA::HandleCopy() {
+    LOG_WARNING(HW_GPU, "Requested a DMA copy");
+
+    const GPUVAddr source = regs.src_address.Address();
+    const GPUVAddr dest = regs.dst_address.Address();
+
+    const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
+    const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
+
+    // TODO(Subv): Perform more research and implement all features of this engine.
+    ASSERT(regs.exec.enable_swizzle == 0);
+    ASSERT(regs.exec.enable_2d == 1);
+    ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
+    ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
+    ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
+    ASSERT(regs.src_params.pos_x == 0);
+    ASSERT(regs.src_params.pos_y == 0);
+    ASSERT(regs.dst_params.pos_x == 0);
+    ASSERT(regs.dst_params.pos_y == 0);
+
+    if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
+        Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count * regs.y_count);
+        return;
+    }
+
+    u8* src_buffer = Memory::GetPointer(source_cpu);
+    u8* dst_buffer = Memory::GetPointer(dest_cpu);
+
+    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
+        // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+        Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer,
+                                  dst_buffer, true, regs.src_params.BlockHeight());
+    } else {
+        // If the input is linear and the output is tiled, swizzle the input and copy it over.
+        Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
+                                  src_buffer, false, regs.dst_params.BlockHeight());
+    }
+}
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
new file mode 100644
index 000000000..905749bde
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.h
@@ -0,0 +1,155 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+namespace Engines {
+
+class MaxwellDMA final {
+public:
+    explicit MaxwellDMA(MemoryManager& memory_manager);
+    ~MaxwellDMA() = default;
+
+    /// Write the value to the register identified by method.
+    void WriteReg(u32 method, u32 value);
+
+    struct Regs {
+        static constexpr size_t NUM_REGS = 0x1D6;
+
+        struct Parameters {
+            union {
+                BitField<0, 4, u32> block_depth;
+                BitField<4, 4, u32> block_height;
+                BitField<8, 4, u32> block_width;
+            };
+            u32 size_x;
+            u32 size_y;
+            u32 size_z;
+            u32 pos_z;
+            union {
+                BitField<0, 16, u32> pos_x;
+                BitField<16, 16, u32> pos_y;
+            };
+
+            u32 BlockHeight() const {
+                return 1 << block_height;
+            }
+        };
+
+        static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
+
+        enum class CopyMode : u32 {
+            None = 0,
+            Unk1 = 1,
+            Unk2 = 2,
+        };
+
+        enum class QueryMode : u32 {
+            None = 0,
+            Short = 1,
+            Long = 2,
+        };
+
+        enum class QueryIntr : u32 {
+            None = 0,
+            Block = 1,
+            NonBlock = 2,
+        };
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS(0xC0);
+
+                struct {
+                    union {
+                        BitField<0, 2, CopyMode> copy_mode;
+                        BitField<2, 1, u32> flush;
+
+                        BitField<3, 2, QueryMode> query_mode;
+                        BitField<5, 2, QueryIntr> query_intr;
+
+                        BitField<7, 1, u32> is_src_linear;
+                        BitField<8, 1, u32> is_dst_linear;
+
+                        BitField<9, 1, u32> enable_2d;
+                        BitField<10, 1, u32> enable_swizzle;
+                    };
+                } exec;
+
+                INSERT_PADDING_WORDS(0x3F);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } src_address;
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } dst_address;
+
+                u32 src_pitch;
+                u32 dst_pitch;
+                u32 x_count;
+                u32 y_count;
+
+                INSERT_PADDING_WORDS(0xBB);
+
+                Parameters dst_params;
+
+                INSERT_PADDING_WORDS(1);
+
+                Parameters src_params;
+
+                INSERT_PADDING_WORDS(0x13);
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    MemoryManager& memory_manager;
+
+private:
+    /// Performs the copy from the source buffer to the destination buffer as configured in the
+    /// registers.
+    void HandleCopy();
+};
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4,                          \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(exec, 0xC0);
+ASSERT_REG_POSITION(src_address, 0x100);
+ASSERT_REG_POSITION(dst_address, 0x102);
+ASSERT_REG_POSITION(src_pitch, 0x104);
+ASSERT_REG_POSITION(dst_pitch, 0x105);
+ASSERT_REG_POSITION(x_count, 0x106);
+ASSERT_REG_POSITION(y_count, 0x107);
+ASSERT_REG_POSITION(dst_params, 0x1C3);
+ASSERT_REG_POSITION(src_params, 0x1CA);
+
+#undef ASSERT_REG_POSITION
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index ec8dbd370..2bc1782ad 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -142,6 +142,7 @@ enum class PredCondition : u64 {
     GreaterThan = 4,
     NotEqual = 5,
     GreaterEqual = 6,
+    NotEqualWithNan = 13,
     // TODO(Subv): Other condition types
 };
 
@@ -165,7 +166,7 @@ enum class SubOp : u64 {
     Lg2 = 0x3,
     Rcp = 0x4,
     Rsq = 0x5,
-    Min = 0x8,
+    Sqrt = 0x8,
 };
 
 enum class F2iRoundingOp : u64 {
@@ -193,6 +194,13 @@ enum class UniformType : u64 {
     Double = 5,
 };
 
+enum class IMinMaxExchange : u64 {
+    None = 0,
+    XLo = 1,
+    XMed = 2,
+    XHi = 3,
+};
+
 union Instruction {
     Instruction& operator=(const Instruction& instr) {
         value = instr.value;
@@ -209,20 +217,19 @@ union Instruction {
     } pred;
     BitField<19, 1, u64> negate_pred;
     BitField<20, 8, Register> gpr20;
-    BitField<20, 7, SubOp> sub_op;
+    BitField<20, 4, SubOp> sub_op;
     BitField<28, 8, Register> gpr28;
     BitField<39, 8, Register> gpr39;
     BitField<48, 16, u64> opcode;
-    BitField<50, 1, u64> saturate_a;
 
     union {
         BitField<20, 19, u64> imm20_19;
-        BitField<20, 32, u64> imm20_32;
+        BitField<20, 32, s64> imm20_32;
         BitField<45, 1, u64> negate_b;
         BitField<46, 1, u64> abs_a;
         BitField<48, 1, u64> negate_a;
         BitField<49, 1, u64> abs_b;
-        BitField<50, 1, u64> abs_d;
+        BitField<50, 1, u64> saturate_d;
         BitField<56, 1, u64> negate_imm;
 
         union {
@@ -231,10 +238,18 @@ union Instruction {
         } fmnmx;
 
         union {
+            BitField<39, 1, u64> invert_a;
+            BitField<40, 1, u64> invert_b;
+            BitField<41, 2, LogicOperation> operation;
+            BitField<44, 2, u64> unk44;
+            BitField<48, 3, Pred> pred48;
+        } lop;
+
+        union {
             BitField<53, 2, LogicOperation> operation;
             BitField<55, 1, u64> invert_a;
             BitField<56, 1, u64> invert_b;
-        } lop;
+        } lop32i;
 
         float GetImm20_19() const {
             float result{};
@@ -247,7 +262,7 @@ union Instruction {
 
         float GetImm20_32() const {
             float result{};
-            u32 imm{static_cast<u32>(imm20_32)};
+            s32 imm{static_cast<s32>(imm20_32)};
             std::memcpy(&result, &imm, sizeof(imm));
             return result;
         }
@@ -271,6 +286,18 @@ union Instruction {
     } alu_integer;
 
     union {
+        BitField<39, 3, u64> pred;
+        BitField<42, 1, u64> negate_pred;
+        BitField<43, 2, IMinMaxExchange> exchange;
+        BitField<48, 1, u64> is_signed;
+    } imnmx;
+
+    union {
+        BitField<54, 1, u64> saturate;
+        BitField<56, 1, u64> negate_a;
+    } iadd32i;
+
+    union {
         BitField<20, 8, u64> shift_position;
         BitField<28, 8, u64> shift_length;
         BitField<48, 1, u64> negate_b;
@@ -316,6 +343,19 @@ union Instruction {
     } isetp;
 
     union {
+        BitField<0, 3, u64> pred0;
+        BitField<3, 3, u64> pred3;
+        BitField<12, 3, u64> pred12;
+        BitField<15, 1, u64> neg_pred12;
+        BitField<24, 2, PredOperation> cond;
+        BitField<29, 3, u64> pred29;
+        BitField<32, 1, u64> neg_pred29;
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred39;
+        BitField<45, 2, PredOperation> op;
+    } psetp;
+
+    union {
         BitField<39, 3, u64> pred39;
         BitField<42, 1, u64> neg_pred;
         BitField<43, 1, u64> neg_a;
@@ -339,7 +379,8 @@ union Instruction {
     } iset;
 
     union {
-        BitField<10, 2, Register::Size> size;
+        BitField<8, 2, Register::Size> dest_size;
+        BitField<10, 2, Register::Size> src_size;
         BitField<12, 1, u64> is_output_signed;
         BitField<13, 1, u64> is_input_signed;
         BitField<41, 2, u64> selector;
@@ -359,7 +400,7 @@ union Instruction {
         BitField<31, 4, u64> component_mask;
 
         bool IsComponentEnabled(size_t component) const {
-            return ((1 << component) & component_mask) != 0;
+            return ((1ull << component) & component_mask) != 0;
         }
     } tex;
 
@@ -378,7 +419,7 @@ union Instruction {
 
             ASSERT(component_mask_selector < mask.size());
 
-            return ((1 << component) & mask[component_mask_selector]) != 0;
+            return ((1ull << component) & mask[component_mask_selector]) != 0;
         }
     } texs;
 
@@ -424,6 +465,8 @@ public:
     enum class Id {
         KIL,
         SSY,
+        SYNC,
+        DEPBAR,
         BFE_C,
         BFE_R,
         BFE_IMM,
@@ -451,6 +494,7 @@ public:
         IADD_C,
         IADD_R,
         IADD_IMM,
+        IADD32I,
         ISCADD_C, // Scale and Add
         ISCADD_R,
         ISCADD_IMM,
@@ -470,6 +514,9 @@ public:
         I2I_C,
         I2I_R,
         I2I_IMM,
+        LOP_C,
+        LOP_R,
+        LOP_IMM,
         LOP32I,
         MOV_C,
         MOV_R,
@@ -509,12 +556,14 @@ public:
     enum class Type {
         Trivial,
         Arithmetic,
+        ArithmeticImmediate,
         ArithmeticInteger,
+        ArithmeticIntegerImmediate,
         Bfe,
-        Logic,
         Shift,
         Ffma,
         Flow,
+        Synch,
         Memory,
         FloatSet,
         FloatSetPredicate,
@@ -619,10 +668,12 @@ private:
             INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
             INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
             INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
+            INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
+            INST("1111000011111---", Id::SYNC, Type::Synch, "SYNC"),
             INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
             INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
             INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
-            INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
+            INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
             INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
             INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
             INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
@@ -638,10 +689,11 @@ private:
             INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
             INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
             INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
-            INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
+            INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
             INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
             INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
             INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
+            INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
             INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
             INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
             INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
@@ -658,17 +710,20 @@ private:
             INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
             INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
             INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
-            INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"),
+            INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
             INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
             INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
             INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
-            INST("0100110000100---", Id::IMNMX_C, Type::Arithmetic, "FMNMX_IMM"),
-            INST("0101110000100---", Id::IMNMX_R, Type::Arithmetic, "FMNMX_IMM"),
-            INST("0011100-00100---", Id::IMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
+            INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"),
+            INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"),
+            INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"),
             INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
             INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
             INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
-            INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"),
+            INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
+            INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
+            INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+            INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
             INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
             INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
             INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),