summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/fermi_2d.cpp5
-rw-r--r--src/video_core/engines/maxwell_3d.cpp21
-rw-r--r--src/video_core/engines/maxwell_3d.h156
-rw-r--r--src/video_core/engines/maxwell_dma.cpp73
-rw-r--r--src/video_core/engines/maxwell_dma.h155
-rw-r--r--src/video_core/engines/shader_bytecode.h91
6 files changed, 453 insertions, 48 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 6b9382f06..34053e393 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -26,8 +26,8 @@ void Fermi2D::WriteReg(u32 method, u32 value) {
}
void Fermi2D::HandleSurfaceCopy() {
- NGLOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
- static_cast<u32>(regs.operation));
+ LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
+ static_cast<u32>(regs.operation));
const GPUVAddr source = regs.src.Address();
const GPUVAddr dest = regs.dst.Address();
@@ -47,6 +47,7 @@ void Fermi2D::HandleSurfaceCopy() {
if (regs.src.linear == regs.dst.linear) {
// If the input layout and the output layout are the same, just perform a raw copy.
+ ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
Memory::CopyBlock(dest_cpu, source_cpu,
src_bytes_per_pixel * regs.dst.width * regs.dst.height);
return;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 86e9dc998..3bca16364 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -126,6 +126,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
DrawArrays();
break;
}
+ case MAXWELL3D_REG_INDEX(clear_buffers): {
+ ProcessClearBuffers();
+ break;
+ }
case MAXWELL3D_REG_INDEX(query.query_get): {
ProcessQueryGet();
break;
@@ -207,8 +211,8 @@ void Maxwell3D::ProcessQueryGet() {
}
void Maxwell3D::DrawArrays() {
- NGLOG_DEBUG(HW_GPU, "called, topology={}, count={}",
- static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count);
+ LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
@@ -328,8 +332,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
Texture::FullTextureInfo tex_info{};
// TODO(Subv): Use the shader to determine which textures are actually accessed.
- tex_info.index = (current_texture - tex_info_buffer.address - TextureInfoOffset) /
- sizeof(Texture::TextureHandle);
+ tex_info.index =
+ static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) /
+ sizeof(Texture::TextureHandle);
// Load the TIC data.
if (tex_handle.tic_id != 0) {
@@ -414,5 +419,13 @@ bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
UNREACHABLE();
}
+void Maxwell3D::ProcessClearBuffers() {
+ ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
+ regs.clear_buffers.R == regs.clear_buffers.B &&
+ regs.clear_buffers.R == regs.clear_buffers.A);
+
+ VideoCore::g_renderer->Rasterizer()->Clear();
+}
+
} // namespace Engines
} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 2dc251205..5a7cf0107 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -280,6 +280,46 @@ public:
UnsignedInt = 0x2,
};
+ enum class ComparisonOp : u32 {
+ // These values are used by Nouveau and most games, they correspond to the OpenGL token
+ // values for these operations.
+ Never = 0x200,
+ Less = 0x201,
+ Equal = 0x202,
+ LessEqual = 0x203,
+ Greater = 0x204,
+ NotEqual = 0x205,
+ GreaterEqual = 0x206,
+ Always = 0x207,
+
+ // These values are used by some games, they seem to be NV04 values.
+ NeverOld = 1,
+ LessOld = 2,
+ EqualOld = 3,
+ LessEqualOld = 4,
+ GreaterOld = 5,
+ NotEqualOld = 6,
+ GreaterEqualOld = 7,
+ AlwaysOld = 8,
+ };
+
+ struct Cull {
+ enum class FrontFace : u32 {
+ ClockWise = 0x0900,
+ CounterClockWise = 0x0901,
+ };
+
+ enum class CullFace : u32 {
+ Front = 0x0404,
+ Back = 0x0405,
+ FrontAndBack = 0x0408,
+ };
+
+ u32 enabled;
+ FrontFace front_face;
+ CullFace cull_face;
+ };
+
struct Blend {
enum class Equation : u32 {
Add = 1,
@@ -321,6 +361,24 @@ public:
INSERT_PADDING_WORDS(1);
};
+ struct RenderTargetConfig {
+ u32 address_high;
+ u32 address_low;
+ u32 width;
+ u32 height;
+ Tegra::RenderTargetFormat format;
+ u32 block_dimensions;
+ u32 array_mode;
+ u32 layer_stride;
+ u32 base_layer;
+ INSERT_PADDING_WORDS(7);
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ };
+
union {
struct {
INSERT_PADDING_WORDS(0x45);
@@ -333,23 +391,7 @@ public:
INSERT_PADDING_WORDS(0x1B8);
- struct {
- u32 address_high;
- u32 address_low;
- u32 width;
- u32 height;
- Tegra::RenderTargetFormat format;
- u32 block_dimensions;
- u32 array_mode;
- u32 layer_stride;
- u32 base_layer;
- INSERT_PADDING_WORDS(7);
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
- } rt[NumRenderTargets];
+ RenderTargetConfig rt[NumRenderTargets];
struct {
f32 scale_x;
@@ -406,12 +448,17 @@ public:
u32 count;
} vertex_buffer;
- INSERT_PADDING_WORDS(0x99);
+ INSERT_PADDING_WORDS(1);
+
+ float clear_color[4];
+ float clear_depth;
+
+ INSERT_PADDING_WORDS(0x93);
struct {
u32 address_high;
u32 address_low;
- u32 format;
+ Tegra::DepthFormat format;
u32 block_dimensions;
u32 layer_stride;
@@ -433,11 +480,23 @@ public:
};
} rt_control;
- INSERT_PADDING_WORDS(0x31);
+ INSERT_PADDING_WORDS(0x2B);
+
+ u32 depth_test_enable;
+
+ INSERT_PADDING_WORDS(0x5);
u32 independent_blend_enable;
- INSERT_PADDING_WORDS(0x15);
+ u32 depth_write_enabled;
+
+ INSERT_PADDING_WORDS(0x7);
+
+ u32 d3d_cull_mode;
+
+ ComparisonOp depth_test_func;
+
+ INSERT_PADDING_WORDS(0xB);
struct {
u32 separate_alpha;
@@ -453,7 +512,17 @@ public:
u32 enable[NumRenderTargets];
} blend;
- INSERT_PADDING_WORDS(0x77);
+ INSERT_PADDING_WORDS(0xB);
+
+ union {
+ BitField<4, 1, u32> triangle_rast_flip;
+ } screen_y_control;
+
+ INSERT_PADDING_WORDS(0x21);
+
+ u32 vb_element_base;
+
+ INSERT_PADDING_WORDS(0x49);
struct {
u32 tsc_address_high;
@@ -479,7 +548,12 @@ public:
}
} tic;
- INSERT_PADDING_WORDS(0x22);
+ INSERT_PADDING_WORDS(0x21);
+
+ union {
+ BitField<2, 1, u32> coord_origin;
+ BitField<3, 10, u32> enable;
+ } point_coord_replace;
struct {
u32 code_address_high;
@@ -534,7 +608,27 @@ public:
}
} index_array;
- INSERT_PADDING_WORDS(0xC7);
+ INSERT_PADDING_WORDS(0x7);
+
+ INSERT_PADDING_WORDS(0x46);
+
+ Cull cull;
+
+ INSERT_PADDING_WORDS(0x2B);
+
+ union {
+ u32 raw;
+ BitField<0, 1, u32> Z;
+ BitField<1, 1, u32> S;
+ BitField<2, 1, u32> R;
+ BitField<3, 1, u32> G;
+ BitField<4, 1, u32> B;
+ BitField<5, 1, u32> A;
+ BitField<6, 4, u32> RT;
+ BitField<10, 11, u32> layer;
+ } clear_buffers;
+
+ INSERT_PADDING_WORDS(0x4B);
struct {
u32 query_address_high;
@@ -716,6 +810,9 @@ private:
/// Handles writes to the macro uploading registers.
void ProcessMacroUpload(u32 data);
+ /// Handles a write to the CLEAR_BUFFERS register.
+ void ProcessClearBuffers();
+
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
@@ -738,16 +835,27 @@ ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform[0], 0x280);
ASSERT_REG_POSITION(viewport, 0x300);
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
+ASSERT_REG_POSITION(clear_color[0], 0x360);
+ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
+ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
+ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
+ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
+ASSERT_REG_POSITION(depth_test_func, 0x4C3);
ASSERT_REG_POSITION(blend, 0x4CF);
+ASSERT_REG_POSITION(screen_y_control, 0x4EB);
+ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(cull, 0x646);
+ASSERT_REG_POSITION(clear_buffers, 0x674);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(independent_blend, 0x780);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
new file mode 100644
index 000000000..6e740713f
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -0,0 +1,73 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/memory.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra {
+namespace Engines {
+
+MaxwellDMA::MaxwellDMA(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+
+void MaxwellDMA::WriteReg(u32 method, u32 value) {
+ ASSERT_MSG(method < Regs::NUM_REGS,
+ "Invalid MaxwellDMA register, increase the size of the Regs structure");
+
+ regs.reg_array[method] = value;
+
+#define MAXWELLDMA_REG_INDEX(field_name) \
+ (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
+
+ switch (method) {
+ case MAXWELLDMA_REG_INDEX(exec): {
+ HandleCopy();
+ break;
+ }
+ }
+
+#undef MAXWELLDMA_REG_INDEX
+}
+
+void MaxwellDMA::HandleCopy() {
+ LOG_WARNING(HW_GPU, "Requested a DMA copy");
+
+ const GPUVAddr source = regs.src_address.Address();
+ const GPUVAddr dest = regs.dst_address.Address();
+
+ const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
+ const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
+
+ // TODO(Subv): Perform more research and implement all features of this engine.
+ ASSERT(regs.exec.enable_swizzle == 0);
+ ASSERT(regs.exec.enable_2d == 1);
+ ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
+ ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
+ ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
+ ASSERT(regs.src_params.pos_x == 0);
+ ASSERT(regs.src_params.pos_y == 0);
+ ASSERT(regs.dst_params.pos_x == 0);
+ ASSERT(regs.dst_params.pos_y == 0);
+
+ if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
+ Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count * regs.y_count);
+ return;
+ }
+
+ u8* src_buffer = Memory::GetPointer(source_cpu);
+ u8* dst_buffer = Memory::GetPointer(dest_cpu);
+
+ if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
+ // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+ Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer,
+ dst_buffer, true, regs.src_params.BlockHeight());
+ } else {
+ // If the input is linear and the output is tiled, swizzle the input and copy it over.
+ Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
+ src_buffer, false, regs.dst_params.BlockHeight());
+ }
+}
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
new file mode 100644
index 000000000..905749bde
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.h
@@ -0,0 +1,155 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+namespace Engines {
+
+class MaxwellDMA final {
+public:
+ explicit MaxwellDMA(MemoryManager& memory_manager);
+ ~MaxwellDMA() = default;
+
+ /// Write the value to the register identified by method.
+ void WriteReg(u32 method, u32 value);
+
+ struct Regs {
+ static constexpr size_t NUM_REGS = 0x1D6;
+
+ struct Parameters {
+ union {
+ BitField<0, 4, u32> block_depth;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_width;
+ };
+ u32 size_x;
+ u32 size_y;
+ u32 size_z;
+ u32 pos_z;
+ union {
+ BitField<0, 16, u32> pos_x;
+ BitField<16, 16, u32> pos_y;
+ };
+
+ u32 BlockHeight() const {
+ return 1 << block_height;
+ }
+ };
+
+ static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
+
+ enum class CopyMode : u32 {
+ None = 0,
+ Unk1 = 1,
+ Unk2 = 2,
+ };
+
+ enum class QueryMode : u32 {
+ None = 0,
+ Short = 1,
+ Long = 2,
+ };
+
+ enum class QueryIntr : u32 {
+ None = 0,
+ Block = 1,
+ NonBlock = 2,
+ };
+
+ union {
+ struct {
+ INSERT_PADDING_WORDS(0xC0);
+
+ struct {
+ union {
+ BitField<0, 2, CopyMode> copy_mode;
+ BitField<2, 1, u32> flush;
+
+ BitField<3, 2, QueryMode> query_mode;
+ BitField<5, 2, QueryIntr> query_intr;
+
+ BitField<7, 1, u32> is_src_linear;
+ BitField<8, 1, u32> is_dst_linear;
+
+ BitField<9, 1, u32> enable_2d;
+ BitField<10, 1, u32> enable_swizzle;
+ };
+ } exec;
+
+ INSERT_PADDING_WORDS(0x3F);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } src_address;
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } dst_address;
+
+ u32 src_pitch;
+ u32 dst_pitch;
+ u32 x_count;
+ u32 y_count;
+
+ INSERT_PADDING_WORDS(0xBB);
+
+ Parameters dst_params;
+
+ INSERT_PADDING_WORDS(1);
+
+ Parameters src_params;
+
+ INSERT_PADDING_WORDS(0x13);
+ };
+ std::array<u32, NUM_REGS> reg_array;
+ };
+ } regs{};
+
+ MemoryManager& memory_manager;
+
+private:
+ /// Performs the copy from the source buffer to the destination buffer as configured in the
+ /// registers.
+ void HandleCopy();
+};
+
+#define ASSERT_REG_POSITION(field_name, position) \
+ static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(exec, 0xC0);
+ASSERT_REG_POSITION(src_address, 0x100);
+ASSERT_REG_POSITION(dst_address, 0x102);
+ASSERT_REG_POSITION(src_pitch, 0x104);
+ASSERT_REG_POSITION(dst_pitch, 0x105);
+ASSERT_REG_POSITION(x_count, 0x106);
+ASSERT_REG_POSITION(y_count, 0x107);
+ASSERT_REG_POSITION(dst_params, 0x1C3);
+ASSERT_REG_POSITION(src_params, 0x1CA);
+
+#undef ASSERT_REG_POSITION
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index ec8dbd370..2bc1782ad 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -142,6 +142,7 @@ enum class PredCondition : u64 {
GreaterThan = 4,
NotEqual = 5,
GreaterEqual = 6,
+ NotEqualWithNan = 13,
// TODO(Subv): Other condition types
};
@@ -165,7 +166,7 @@ enum class SubOp : u64 {
Lg2 = 0x3,
Rcp = 0x4,
Rsq = 0x5,
- Min = 0x8,
+ Sqrt = 0x8,
};
enum class F2iRoundingOp : u64 {
@@ -193,6 +194,13 @@ enum class UniformType : u64 {
Double = 5,
};
+enum class IMinMaxExchange : u64 {
+ None = 0,
+ XLo = 1,
+ XMed = 2,
+ XHi = 3,
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -209,20 +217,19 @@ union Instruction {
} pred;
BitField<19, 1, u64> negate_pred;
BitField<20, 8, Register> gpr20;
- BitField<20, 7, SubOp> sub_op;
+ BitField<20, 4, SubOp> sub_op;
BitField<28, 8, Register> gpr28;
BitField<39, 8, Register> gpr39;
BitField<48, 16, u64> opcode;
- BitField<50, 1, u64> saturate_a;
union {
BitField<20, 19, u64> imm20_19;
- BitField<20, 32, u64> imm20_32;
+ BitField<20, 32, s64> imm20_32;
BitField<45, 1, u64> negate_b;
BitField<46, 1, u64> abs_a;
BitField<48, 1, u64> negate_a;
BitField<49, 1, u64> abs_b;
- BitField<50, 1, u64> abs_d;
+ BitField<50, 1, u64> saturate_d;
BitField<56, 1, u64> negate_imm;
union {
@@ -231,10 +238,18 @@ union Instruction {
} fmnmx;
union {
+ BitField<39, 1, u64> invert_a;
+ BitField<40, 1, u64> invert_b;
+ BitField<41, 2, LogicOperation> operation;
+ BitField<44, 2, u64> unk44;
+ BitField<48, 3, Pred> pred48;
+ } lop;
+
+ union {
BitField<53, 2, LogicOperation> operation;
BitField<55, 1, u64> invert_a;
BitField<56, 1, u64> invert_b;
- } lop;
+ } lop32i;
float GetImm20_19() const {
float result{};
@@ -247,7 +262,7 @@ union Instruction {
float GetImm20_32() const {
float result{};
- u32 imm{static_cast<u32>(imm20_32)};
+ s32 imm{static_cast<s32>(imm20_32)};
std::memcpy(&result, &imm, sizeof(imm));
return result;
}
@@ -271,6 +286,18 @@ union Instruction {
} alu_integer;
union {
+ BitField<39, 3, u64> pred;
+ BitField<42, 1, u64> negate_pred;
+ BitField<43, 2, IMinMaxExchange> exchange;
+ BitField<48, 1, u64> is_signed;
+ } imnmx;
+
+ union {
+ BitField<54, 1, u64> saturate;
+ BitField<56, 1, u64> negate_a;
+ } iadd32i;
+
+ union {
BitField<20, 8, u64> shift_position;
BitField<28, 8, u64> shift_length;
BitField<48, 1, u64> negate_b;
@@ -316,6 +343,19 @@ union Instruction {
} isetp;
union {
+ BitField<0, 3, u64> pred0;
+ BitField<3, 3, u64> pred3;
+ BitField<12, 3, u64> pred12;
+ BitField<15, 1, u64> neg_pred12;
+ BitField<24, 2, PredOperation> cond;
+ BitField<29, 3, u64> pred29;
+ BitField<32, 1, u64> neg_pred29;
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred39;
+ BitField<45, 2, PredOperation> op;
+ } psetp;
+
+ union {
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
@@ -339,7 +379,8 @@ union Instruction {
} iset;
union {
- BitField<10, 2, Register::Size> size;
+ BitField<8, 2, Register::Size> dest_size;
+ BitField<10, 2, Register::Size> src_size;
BitField<12, 1, u64> is_output_signed;
BitField<13, 1, u64> is_input_signed;
BitField<41, 2, u64> selector;
@@ -359,7 +400,7 @@ union Instruction {
BitField<31, 4, u64> component_mask;
bool IsComponentEnabled(size_t component) const {
- return ((1 << component) & component_mask) != 0;
+ return ((1ull << component) & component_mask) != 0;
}
} tex;
@@ -378,7 +419,7 @@ union Instruction {
ASSERT(component_mask_selector < mask.size());
- return ((1 << component) & mask[component_mask_selector]) != 0;
+ return ((1ull << component) & mask[component_mask_selector]) != 0;
}
} texs;
@@ -424,6 +465,8 @@ public:
enum class Id {
KIL,
SSY,
+ SYNC,
+ DEPBAR,
BFE_C,
BFE_R,
BFE_IMM,
@@ -451,6 +494,7 @@ public:
IADD_C,
IADD_R,
IADD_IMM,
+ IADD32I,
ISCADD_C, // Scale and Add
ISCADD_R,
ISCADD_IMM,
@@ -470,6 +514,9 @@ public:
I2I_C,
I2I_R,
I2I_IMM,
+ LOP_C,
+ LOP_R,
+ LOP_IMM,
LOP32I,
MOV_C,
MOV_R,
@@ -509,12 +556,14 @@ public:
enum class Type {
Trivial,
Arithmetic,
+ ArithmeticImmediate,
ArithmeticInteger,
+ ArithmeticIntegerImmediate,
Bfe,
- Logic,
Shift,
Ffma,
Flow,
+ Synch,
Memory,
FloatSet,
FloatSetPredicate,
@@ -619,10 +668,12 @@ private:
INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
+ INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
+ INST("1111000011111---", Id::SYNC, Type::Synch, "SYNC"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
- INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
+ INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
@@ -638,10 +689,11 @@ private:
INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
- INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
+ INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
+ INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
@@ -658,17 +710,20 @@ private:
INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
- INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"),
+ INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
- INST("0100110000100---", Id::IMNMX_C, Type::Arithmetic, "FMNMX_IMM"),
- INST("0101110000100---", Id::IMNMX_R, Type::Arithmetic, "FMNMX_IMM"),
- INST("0011100-00100---", Id::IMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
+ INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"),
+ INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"),
+ INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"),
INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
- INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"),
+ INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
+ INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
+ INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+ INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),