summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
authorEmmanuel Gil Peyrot <linkmauve@linkmauve.fr>2016-09-18 02:38:01 +0200
committerEmmanuel Gil Peyrot <linkmauve@linkmauve.fr>2016-09-18 02:38:01 +0200
commitdc8479928c5aee4c6ad6fe4f59006fb604cee701 (patch)
tree569a7f13128450bbab973236615587ff00bced5f /src/video_core/shader
parentTravis: Import Dolphin’s clang-format hook. (diff)
downloadyuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.tar
yuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.tar.gz
yuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.tar.bz2
yuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.tar.lz
yuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.tar.xz
yuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.tar.zst
yuzu-dc8479928c5aee4c6ad6fe4f59006fb604cee701.zip
Diffstat (limited to '')
-rw-r--r--src/video_core/shader/shader.cpp39
-rw-r--r--src/video_core/shader/shader.h92
-rw-r--r--src/video_core/shader/shader_interpreter.cpp290
-rw-r--r--src/video_core/shader/shader_interpreter.h5
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp212
-rw-r--r--src/video_core/shader/shader_jit_x64.h8
6 files changed, 335 insertions, 311 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index f565e2c91..852c5a9a0 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -46,10 +46,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
const auto& output_register_map = g_state.regs.vs_output_attributes[index];
- u32 semantics[4] = {
- output_register_map.map_x, output_register_map.map_y,
- output_register_map.map_z, output_register_map.map_w
- };
+ u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
+ output_register_map.map_z, output_register_map.map_w};
for (unsigned comp = 0; comp < 4; ++comp) {
float24* out = ((float24*)&ret) + semantics[comp];
@@ -65,19 +63,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
index++;
}
- // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
+ // The hardware takes the absolute and saturates vertex colors like this, *before* doing
+ // interpolation
for (unsigned i = 0; i < 4; ++i) {
- ret.color[i] = float24::FromFloat32(
- std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
+ ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
}
LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
- "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
- ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
- ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
- ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
- ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
- ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
+ "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
+ ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(),
+ ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(),
+ ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(),
+ ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
+ ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(),
+ ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
return ret;
}
@@ -96,8 +95,9 @@ void ClearCache() {
void ShaderSetup::Setup() {
#ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled) {
- u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
- Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
+ u64 cache_key =
+ (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
+ Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
auto iter = shader_map.find(cache_key);
if (iter != shader_map.end()) {
@@ -127,7 +127,7 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
const auto& attribute_register_map = config.input_register_map;
for (unsigned i = 0; i < num_attributes; i++)
- state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
+ state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
state.conditional_code[0] = false;
state.conditional_code[1] = false;
@@ -140,10 +140,11 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
#else
RunInterpreter(setup, state, config.main_offset);
#endif // ARCHITECTURE_x86_64
-
}
-DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
+DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes,
+ const Regs::ShaderConfig& config,
+ const ShaderSetup& setup) {
UnitState<true> state;
state.debug.max_offset = 0;
@@ -155,7 +156,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
boost::fill(state.registers.input, &dummy_register);
for (unsigned i = 0; i < num_attributes; i++)
- state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
+ state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
state.conditional_code[0] = false;
state.conditional_code[1] = false;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index fee16df62..830d933a8 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -94,46 +94,46 @@ struct OutputRegisters {
static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
// Helper structure used to keep track of data useful for inspection of shader emulation
-template<bool full_debugging>
+template <bool full_debugging>
struct DebugData;
-template<>
+template <>
struct DebugData<false> {
// TODO: Hide these behind and interface and move them to DebugData<true>
- u32 max_offset; // maximum program counter ever reached
+ u32 max_offset; // maximum program counter ever reached
u32 max_opdesc_id; // maximum swizzle pattern index ever used
};
-template<>
+template <>
struct DebugData<true> {
// Records store the input and output operands of a particular instruction.
struct Record {
enum Type {
// Floating point arithmetic operands
- SRC1 = 0x1,
- SRC2 = 0x2,
- SRC3 = 0x4,
+ SRC1 = 0x1,
+ SRC2 = 0x2,
+ SRC3 = 0x4,
// Initial and final output operand value
- DEST_IN = 0x8,
- DEST_OUT = 0x10,
+ DEST_IN = 0x8,
+ DEST_OUT = 0x10,
// Current and next instruction offset (in words)
- CUR_INSTR = 0x20,
- NEXT_INSTR = 0x40,
+ CUR_INSTR = 0x20,
+ NEXT_INSTR = 0x40,
// Output address register value
ADDR_REG_OUT = 0x80,
// Result of a comparison instruction
- CMP_RESULT = 0x100,
+ CMP_RESULT = 0x100,
// Input values for conditional flow control instructions
COND_BOOL_IN = 0x200,
- COND_CMP_IN = 0x400,
+ COND_CMP_IN = 0x400,
// Input values for a loop
- LOOP_INT_IN = 0x800,
+ LOOP_INT_IN = 0x800,
};
Math::Vec4<float24> src1;
@@ -156,7 +156,7 @@ struct DebugData<true> {
unsigned mask = 0;
};
- u32 max_offset; // maximum program counter ever reached
+ u32 max_offset; // maximum program counter ever reached
u32 max_opdesc_id; // maximum swizzle pattern index ever used
// List of records for each executed shader instruction
@@ -167,10 +167,10 @@ struct DebugData<true> {
using DebugDataRecord = DebugData<true>::Record;
// Helper function to set a DebugData<true>::Record field based on the template enum parameter.
-template<DebugDataRecord::Type type, typename ValueType>
+template <DebugDataRecord::Type type, typename ValueType>
inline void SetField(DebugDataRecord& record, ValueType value);
-template<>
+template <>
inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
record.src1.x = value[0];
record.src1.y = value[1];
@@ -178,7 +178,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va
record.src1.w = value[3];
}
-template<>
+template <>
inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
record.src2.x = value[0];
record.src2.y = value[1];
@@ -186,7 +186,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va
record.src2.w = value[3];
}
-template<>
+template <>
inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
record.src3.x = value[0];
record.src3.y = value[1];
@@ -194,7 +194,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va
record.src3.w = value[3];
}
-template<>
+template <>
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
record.dest_in.x = value[0];
record.dest_in.y = value[1];
@@ -202,7 +202,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24*
record.dest_in.w = value[3];
}
-template<>
+template <>
inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
record.dest_out.x = value[0];
record.dest_out.y = value[1];
@@ -210,67 +210,66 @@ inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24
record.dest_out.w = value[3];
}
-template<>
+template <>
inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {
record.address_registers[0] = value[0];
record.address_registers[1] = value[1];
}
-template<>
+template <>
inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {
record.conditional_code[0] = value[0];
record.conditional_code[1] = value[1];
}
-template<>
+template <>
inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {
record.cond_bool = value;
}
-template<>
+template <>
inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {
record.cond_cmp[0] = value[0];
record.cond_cmp[1] = value[1];
}
-template<>
+template <>
inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {
record.loop_int = value;
}
-template<>
+template <>
inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {
record.instruction_offset = value;
}
-template<>
+template <>
inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {
record.next_instruction = value;
}
// Helper function to set debug information on the current shader iteration.
-template<DebugDataRecord::Type type, typename ValueType>
+template <DebugDataRecord::Type type, typename ValueType>
inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {
// Debugging disabled => nothing to do
}
-template<DebugDataRecord::Type type, typename ValueType>
+template <DebugDataRecord::Type type, typename ValueType>
inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {
if (offset >= debug_data.records.size())
debug_data.records.resize(offset + 1);
- SetField<type, ValueType>(debug_data.records[offset], value);
- debug_data.records[offset].mask |= type;
+ SetField<type, ValueType>(debug_data.records[offset], value);
+ debug_data.records[offset].mask |= type;
}
-
/**
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
* has four shader units that process shaders in parallel. At the present, Citra only implements a
* single shader unit that processes all shaders serially. Putting the state information in a struct
* here will make it easier for us to parallelize the shader processing later.
*/
-template<bool Debug>
+template <bool Debug>
struct UnitState {
struct Registers {
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
@@ -293,10 +292,12 @@ struct UnitState {
static size_t InputOffset(const SourceRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Input:
- return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.input) +
+ reg.GetIndex() * sizeof(Math::Vec4<float24>);
case RegisterType::Temporary:
- return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.temporary) +
+ reg.GetIndex() * sizeof(Math::Vec4<float24>);
default:
UNREACHABLE();
@@ -307,10 +308,12 @@ struct UnitState {
static size_t OutputOffset(const DestRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Output:
- return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, output_registers.value) +
+ reg.GetIndex() * sizeof(Math::Vec4<float24>);
case RegisterType::Temporary:
- return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.temporary) +
+ reg.GetIndex() * sizeof(Math::Vec4<float24>);
default:
UNREACHABLE();
@@ -336,13 +339,13 @@ struct ShaderSetup {
static size_t UniformOffset(RegisterType type, unsigned index) {
switch (type) {
case RegisterType::FloatUniform:
- return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
+ return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>);
case RegisterType::BoolUniform:
- return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
+ return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool);
case RegisterType::IntUniform:
- return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
+ return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
default:
UNREACHABLE();
@@ -354,7 +357,8 @@ struct ShaderSetup {
std::array<u32, 1024> swizzle_data;
/**
- * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
+ * Performs any shader unit setup that only needs to happen once per shader (as opposed to once
+ * per
* vertex, which would happen within the `Run` function).
*/
void Setup();
@@ -375,8 +379,8 @@ struct ShaderSetup {
* @param setup Setup object for the shader pipeline
* @return Debug information for this shader with regards to the given vertex
*/
- DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
-
+ DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
+ const Regs::ShaderConfig& config, const ShaderSetup& setup);
};
} // namespace Shader
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index f6c86a759..681ff9728 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -40,7 +40,7 @@ struct CallStackElement {
u32 loop_address; // The address where we'll return to after each loop iteration
};
-template<bool Debug>
+template <bool Debug>
void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
// TODO: Is there a maximal size for this?
boost::container::static_vector<CallStackElement, 16> call_stack;
@@ -74,14 +74,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
}
}
- const Instruction instr = { program_code[program_counter] };
- const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
+ const Instruction instr = {program_code[program_counter]};
+ const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]};
- auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions,
- u32 return_offset, u8 repeat_count, u8 loop_increment) {
- program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
+ auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset,
+ u32 num_instructions, u32 return_offset,
+ u8 repeat_count, u8 loop_increment) {
+ program_counter =
+ offset -
+ 1; // -1 to make sure when incrementing the PC we end up at the correct offset
ASSERT(call_stack.size() < call_stack.capacity());
- call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
+ call_stack.push_back(
+ {offset + num_instructions, return_offset, repeat_count, loop_increment, offset});
};
Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
if (iteration > 0)
@@ -106,24 +110,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
};
switch (instr.opcode.Value().GetInfo().type) {
- case OpCode::Type::Arithmetic:
- {
- const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
+ case OpCode::Type::Arithmetic: {
+ const bool is_inverted =
+ (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
- const int address_offset = (instr.common.address_register_index == 0)
- ? 0 : state.address_registers[instr.common.address_register_index - 1];
+ const int address_offset =
+ (instr.common.address_register_index == 0)
+ ? 0
+ : state.address_registers[instr.common.address_register_index - 1];
- const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset));
- const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset));
+ const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) +
+ (!is_inverted * address_offset));
+ const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) +
+ (is_inverted * address_offset));
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
float24 src1[4] = {
- src1_[(int)swizzle.GetSelectorSrc1(0)],
- src1_[(int)swizzle.GetSelectorSrc1(1)],
- src1_[(int)swizzle.GetSelectorSrc1(2)],
- src1_[(int)swizzle.GetSelectorSrc1(3)],
+ src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
+ src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
};
if (negate_src1) {
src1[0] = src1[0] * float24::FromFloat32(-1);
@@ -132,10 +138,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src1[3] = src1[3] * float24::FromFloat32(-1);
}
float24 src2[4] = {
- src2_[(int)swizzle.GetSelectorSrc2(0)],
- src2_[(int)swizzle.GetSelectorSrc2(1)],
- src2_[(int)swizzle.GetSelectorSrc2(2)],
- src2_[(int)swizzle.GetSelectorSrc2(3)],
+ src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
+ src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
};
if (negate_src2) {
src2[0] = src2[0] * float24::FromFloat32(-1);
@@ -144,15 +148,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src2[3] = src2[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
- : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
- : dummy_vec4_float24;
+ float24* dest =
+ (instr.common.dest.Value() < 0x10)
+ ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
+ : (instr.common.dest.Value() < 0x20)
+ ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
+ : dummy_vec4_float24;
- state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
+ state.debug.max_opdesc_id =
+ std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id);
switch (instr.opcode.Value().EffectiveOpCode()) {
- case OpCode::Id::ADD:
- {
+ case OpCode::Id::ADD: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -166,8 +173,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
}
- case OpCode::Id::MUL:
- {
+ case OpCode::Id::MUL: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -228,8 +234,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
case OpCode::Id::DP3:
case OpCode::Id::DP4:
case OpCode::Id::DPH:
- case OpCode::Id::DPHI:
- {
+ case OpCode::Id::DPHI: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -239,7 +244,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src1[3] = float24::FromFloat32(1.0f);
int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
- float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f));
+ float24 dot = std::inner_product(src1, src1 + num_components, src2,
+ float24::FromFloat32(0.f));
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -252,8 +258,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
}
// Reciprocal
- case OpCode::Id::RCP:
- {
+ case OpCode::Id::RCP: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
@@ -268,8 +273,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
}
// Reciprocal Square Root
- case OpCode::Id::RSQ:
- {
+ case OpCode::Id::RSQ: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
@@ -283,8 +287,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
}
- case OpCode::Id::MOVA:
- {
+ case OpCode::Id::MOVA: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
for (int i = 0; i < 2; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -293,12 +296,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
// TODO: Figure out how the rounding is done on hardware
state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
}
- Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers);
+ Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration,
+ state.address_registers);
break;
}
- case OpCode::Id::MOV:
- {
+ case OpCode::Id::MOV: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
for (int i = 0; i < 4; ++i) {
@@ -320,7 +323,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
if (!swizzle.DestComponentEnabled(i))
continue;
- dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
+ dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f)
+ : float24::FromFloat32(0.0f);
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@@ -334,7 +338,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
if (!swizzle.DestComponentEnabled(i))
continue;
- dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
+ dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f)
+ : float24::FromFloat32(0.0f);
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@@ -349,40 +354,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
switch (op) {
- case Instruction::Common::CompareOpType::Equal:
- state.conditional_code[i] = (src1[i] == src2[i]);
- break;
+ case Instruction::Common::CompareOpType::Equal:
+ state.conditional_code[i] = (src1[i] == src2[i]);
+ break;
- case Instruction::Common::CompareOpType::NotEqual:
- state.conditional_code[i] = (src1[i] != src2[i]);
- break;
+ case Instruction::Common::CompareOpType::NotEqual:
+ state.conditional_code[i] = (src1[i] != src2[i]);
+ break;
- case Instruction::Common::CompareOpType::LessThan:
- state.conditional_code[i] = (src1[i] < src2[i]);
- break;
+ case Instruction::Common::CompareOpType::LessThan:
+ state.conditional_code[i] = (src1[i] < src2[i]);
+ break;
- case Instruction::Common::CompareOpType::LessEqual:
- state.conditional_code[i] = (src1[i] <= src2[i]);
- break;
+ case Instruction::Common::CompareOpType::LessEqual:
+ state.conditional_code[i] = (src1[i] <= src2[i]);
+ break;
- case Instruction::Common::CompareOpType::GreaterThan:
- state.conditional_code[i] = (src1[i] > src2[i]);
- break;
+ case Instruction::Common::CompareOpType::GreaterThan:
+ state.conditional_code[i] = (src1[i] > src2[i]);
+ break;
- case Instruction::Common::CompareOpType::GreaterEqual:
- state.conditional_code[i] = (src1[i] >= src2[i]);
- break;
+ case Instruction::Common::CompareOpType::GreaterEqual:
+ state.conditional_code[i] = (src1[i] >= src2[i]);
+ break;
- default:
- LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
- break;
+ default:
+ LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
+ break;
}
}
Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
break;
- case OpCode::Id::EX2:
- {
+ case OpCode::Id::EX2: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -399,8 +403,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
}
- case OpCode::Id::LG2:
- {
+ case OpCode::Id::LG2: {
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -419,7 +422,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
default:
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
- (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
+ (int)instr.opcode.Value().EffectiveOpCode(),
+ instr.opcode.Value().GetInfo().name, instr.hex);
DEBUG_ASSERT(false);
break;
}
@@ -427,30 +431,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
}
- case OpCode::Type::MultiplyAdd:
- {
+ case OpCode::Type::MultiplyAdd: {
if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
(instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
- const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]);
+ const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(
+ &swizzle_data[instr.mad.operand_desc_id]);
bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
- const int address_offset = (instr.mad.address_register_index == 0)
- ? 0 : state.address_registers[instr.mad.address_register_index - 1];
+ const int address_offset =
+ (instr.mad.address_register_index == 0)
+ ? 0
+ : state.address_registers[instr.mad.address_register_index - 1];
const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
- const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset));
- const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset));
+ const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) +
+ (!is_inverted * address_offset));
+ const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) +
+ (is_inverted * address_offset));
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
const bool negate_src3 = ((bool)swizzle.negate_src3 != false);
float24 src1[4] = {
- src1_[(int)swizzle.GetSelectorSrc1(0)],
- src1_[(int)swizzle.GetSelectorSrc1(1)],
- src1_[(int)swizzle.GetSelectorSrc1(2)],
- src1_[(int)swizzle.GetSelectorSrc1(3)],
+ src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
+ src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
};
if (negate_src1) {
src1[0] = src1[0] * float24::FromFloat32(-1);
@@ -459,10 +465,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src1[3] = src1[3] * float24::FromFloat32(-1);
}
float24 src2[4] = {
- src2_[(int)swizzle.GetSelectorSrc2(0)],
- src2_[(int)swizzle.GetSelectorSrc2(1)],
- src2_[(int)swizzle.GetSelectorSrc2(2)],
- src2_[(int)swizzle.GetSelectorSrc2(3)],
+ src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
+ src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
};
if (negate_src2) {
src2[0] = src2[0] * float24::FromFloat32(-1);
@@ -471,10 +475,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src2[3] = src2[3] * float24::FromFloat32(-1);
}
float24 src3[4] = {
- src3_[(int)swizzle.GetSelectorSrc3(0)],
- src3_[(int)swizzle.GetSelectorSrc3(1)],
- src3_[(int)swizzle.GetSelectorSrc3(2)],
- src3_[(int)swizzle.GetSelectorSrc3(3)],
+ src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)],
+ src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)],
};
if (negate_src3) {
src3[0] = src3[0] * float24::FromFloat32(-1);
@@ -483,9 +485,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
src3[3] = src3[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
- : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
- : dummy_vec4_float24;
+ float24* dest =
+ (instr.mad.dest.Value() < 0x10)
+ ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
+ : (instr.mad.dest.Value() < 0x20)
+ ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
+ : dummy_vec4_float24;
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
@@ -500,16 +505,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
} else {
LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
- (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
+ (int)instr.opcode.Value().EffectiveOpCode(),
+ instr.opcode.Value().GetInfo().name, instr.hex);
}
break;
}
- default:
- {
- static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) {
- bool results[2] = { refx == state.conditional_code[0],
- refy == state.conditional_code[1] };
+ default: {
+ static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy,
+ Instruction::FlowControlType flow_control) {
+ bool results[2] = {refx == state.conditional_code[0],
+ refy == state.conditional_code[1]};
switch (flow_control.op) {
case flow_control.Or:
@@ -533,44 +539,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
case OpCode::Id::JMPC:
- Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
- if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
+ Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
+ state.conditional_code);
+ if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
+ instr.flow_control)) {
program_counter = instr.flow_control.dest_offset - 1;
}
break;
case OpCode::Id::JMPU:
- Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
+ Record<DebugDataRecord::COND_BOOL_IN>(
+ state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
- if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
+ if (uniforms.b[instr.flow_control.bool_uniform_id] ==
+ !(instr.flow_control.num_instructions & 1)) {
program_counter = instr.flow_control.dest_offset - 1;
}
break;
case OpCode::Id::CALL:
- call(state,
- instr.flow_control.dest_offset,
- instr.flow_control.num_instructions,
+ call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
program_counter + 1, 0, 0);
break;
case OpCode::Id::CALLU:
- Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
+ Record<DebugDataRecord::COND_BOOL_IN>(
+ state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
- call(state,
- instr.flow_control.dest_offset,
- instr.flow_control.num_instructions,
- program_counter + 1, 0, 0);
+ call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
+ program_counter + 1, 0, 0);
}
break;
case OpCode::Id::CALLC:
- Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
- if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
- call(state,
- instr.flow_control.dest_offset,
- instr.flow_control.num_instructions,
- program_counter + 1, 0, 0);
+ Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
+ state.conditional_code);
+ if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
+ instr.flow_control)) {
+ call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
+ program_counter + 1, 0, 0);
}
break;
@@ -578,43 +585,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
break;
case OpCode::Id::IFU:
- Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
+ Record<DebugDataRecord::COND_BOOL_IN>(
+ state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
if (uniforms.b[instr.flow_control.bool_uniform_id]) {
- call(state,
- program_counter + 1,
+ call(state, program_counter + 1,
instr.flow_control.dest_offset - program_counter - 1,
- instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
+ 0);
} else {
- call(state,
- instr.flow_control.dest_offset,
- instr.flow_control.num_instructions,
- instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
+ call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
+ 0);
}
break;
- case OpCode::Id::IFC:
- {
+ case OpCode::Id::IFC: {
// TODO: Do we need to consider swizzlers here?
- Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
- if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
- call(state,
- program_counter + 1,
+ Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
+ state.conditional_code);
+ if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
+ instr.flow_control)) {
+ call(state, program_counter + 1,
instr.flow_control.dest_offset - program_counter - 1,
- instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
+ 0);
} else {
- call(state,
- instr.flow_control.dest_offset,
- instr.flow_control.num_instructions,
- instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
+ call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
+ instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
+ 0);
}
break;
}
- case OpCode::Id::LOOP:
- {
+ case OpCode::Id::LOOP: {
Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x,
uniforms.i[instr.flow_control.int_uniform_id].y,
uniforms.i[instr.flow_control.int_uniform_id].z,
@@ -622,18 +628,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
state.address_registers[2] = loop_param.y;
Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
- call(state,
- program_counter + 1,
+ call(state, program_counter + 1,
instr.flow_control.dest_offset - program_counter + 1,
- instr.flow_control.dest_offset + 1,
- loop_param.x,
- loop_param.z);
+ instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z);
break;
}
default:
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
- (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
+ (int)instr.opcode.Value().EffectiveOpCode(),
+ instr.opcode.Value().GetInfo().name, instr.hex);
break;
}
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index bb3ce1c6e..48ede0a2e 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -8,9 +8,10 @@ namespace Pica {
namespace Shader {
-template <bool Debug> struct UnitState;
+template <bool Debug>
+struct UnitState;
-template<bool Debug>
+template <bool Debug>
void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
} // namespace
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 43e7e6b4c..04e04ba1a 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -31,70 +31,70 @@ using namespace Gen;
typedef void (JitShader::*JitFunction)(Instruction instr);
const JitFunction instr_table[64] = {
- &JitShader::Compile_ADD, // add
- &JitShader::Compile_DP3, // dp3
- &JitShader::Compile_DP4, // dp4
- &JitShader::Compile_DPH, // dph
- nullptr, // unknown
- &JitShader::Compile_EX2, // ex2
- &JitShader::Compile_LG2, // lg2
- nullptr, // unknown
- &JitShader::Compile_MUL, // mul
- &JitShader::Compile_SGE, // sge
- &JitShader::Compile_SLT, // slt
- &JitShader::Compile_FLR, // flr
- &JitShader::Compile_MAX, // max
- &JitShader::Compile_MIN, // min
- &JitShader::Compile_RCP, // rcp
- &JitShader::Compile_RSQ, // rsq
- nullptr, // unknown
- nullptr, // unknown
- &JitShader::Compile_MOVA, // mova
- &JitShader::Compile_MOV, // mov
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- &JitShader::Compile_DPH, // dphi
- nullptr, // unknown
- &JitShader::Compile_SGE, // sgei
- &JitShader::Compile_SLT, // slti
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- &JitShader::Compile_NOP, // nop
- &JitShader::Compile_END, // end
- nullptr, // break
- &JitShader::Compile_CALL, // call
- &JitShader::Compile_CALLC, // callc
- &JitShader::Compile_CALLU, // callu
- &JitShader::Compile_IF, // ifu
- &JitShader::Compile_IF, // ifc
- &JitShader::Compile_LOOP, // loop
- nullptr, // emit
- nullptr, // sete
- &JitShader::Compile_JMP, // jmpc
- &JitShader::Compile_JMP, // jmpu
- &JitShader::Compile_CMP, // cmp
- &JitShader::Compile_CMP, // cmp
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_ADD, // add
+ &JitShader::Compile_DP3, // dp3
+ &JitShader::Compile_DP4, // dp4
+ &JitShader::Compile_DPH, // dph
+ nullptr, // unknown
+ &JitShader::Compile_EX2, // ex2
+ &JitShader::Compile_LG2, // lg2
+ nullptr, // unknown
+ &JitShader::Compile_MUL, // mul
+ &JitShader::Compile_SGE, // sge
+ &JitShader::Compile_SLT, // slt
+ &JitShader::Compile_FLR, // flr
+ &JitShader::Compile_MAX, // max
+ &JitShader::Compile_MIN, // min
+ &JitShader::Compile_RCP, // rcp
+ &JitShader::Compile_RSQ, // rsq
+ nullptr, // unknown
+ nullptr, // unknown
+ &JitShader::Compile_MOVA, // mova
+ &JitShader::Compile_MOV, // mov
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ &JitShader::Compile_DPH, // dphi
+ nullptr, // unknown
+ &JitShader::Compile_SGE, // sgei
+ &JitShader::Compile_SLT, // slti
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ &JitShader::Compile_NOP, // nop
+ &JitShader::Compile_END, // end
+ nullptr, // break
+ &JitShader::Compile_CALL, // call
+ &JitShader::Compile_CALLC, // callc
+ &JitShader::Compile_CALLU, // callu
+ &JitShader::Compile_IF, // ifu
+ &JitShader::Compile_IF, // ifc
+ &JitShader::Compile_LOOP, // loop
+ nullptr, // emit
+ nullptr, // sete
+ &JitShader::Compile_JMP, // jmpc
+ &JitShader::Compile_JMP, // jmpu
+ &JitShader::Compile_CMP, // cmp
+ &JitShader::Compile_CMP, // cmp
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
};
// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
@@ -136,9 +136,9 @@ static const X64Reg NEGBIT = XMM15;
// State registers that must not be modified by external functions calls
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
static const BitSet32 persistent_regs = {
- SETUP, STATE, // Pointers to register blocks
+ SETUP, STATE, // Pointers to register blocks
ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
- ONE+16, NEGBIT+16, // Constants
+ ONE + 16, NEGBIT + 16, // Constants
};
/// Raw constant for the source register selector that indicates no swizzling is performed
@@ -152,7 +152,7 @@ static const u8 NO_DEST_REG_MASK = 0xf;
* @return Instruction at the specified offset
*/
static Instruction GetVertexShaderInstruction(size_t offset) {
- return { g_state.vs.program_code[offset] };
+ return {g_state.vs.program_code[offset]};
}
static void LogCritical(const char* msg) {
@@ -172,7 +172,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {
* @param src_reg SourceRegister object corresponding to the source register to load
* @param dest Destination XMM register to store the loaded, swizzled source register
*/
-void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
+void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
+ X64Reg dest) {
X64Reg src_ptr;
size_t src_offset;
@@ -189,7 +190,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
unsigned operand_desc_id;
- const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
+ const bool is_inverted =
+ (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
unsigned address_register_index;
unsigned offset_src;
@@ -225,7 +227,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
}
- SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
+ SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
// Generate instructions for source register swizzling as needed
u8 sel = swiz.GetRawSelector(src_num);
@@ -238,13 +240,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
}
// If the source register should be negated, flip the negative bit using XOR
- const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 };
+ const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
if (negate[src_num - 1]) {
XORPS(dest, R(NEGBIT));
}
}
-void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
+void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) {
DestRegister dest;
unsigned operand_desc_id;
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
@@ -256,10 +258,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
dest = instr.common.dest.Value();
}
- SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
+ SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest);
- ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type");
+ ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest),
+ "Destinaton offset too large for int type");
// If all components are enabled, write the result to the destination register
if (swiz.dest_mask == NO_DEST_REG_MASK) {
@@ -267,18 +270,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
MOVAPS(MDisp(STATE, dest_offset_disp), src);
} else {
- // Not all components are enabled, so mask the result when storing to the destination register...
+ // Not all components are enabled, so mask the result when storing to the destination
+ // register...
MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
if (Common::GetCPUCaps().sse4_1) {
- u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
+ u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
+ ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
BLENDPS(SCRATCH, R(src), mask);
} else {
MOVAPS(SCRATCH2, R(src));
UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
- UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
+ UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
- // Compute selector to selectively copy source components to destination for SHUFPS instruction
+ // Compute selector to selectively copy source components to destination for SHUFPS
+ // instruction
u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
@@ -336,7 +342,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
}
void JitShader::Compile_UniformCondition(Instruction instr) {
- int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
+ int offset =
+ ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
}
@@ -512,7 +519,7 @@ void JitShader::Compile_MIN(Instruction instr) {
}
void JitShader::Compile_MOVA(Instruction instr) {
- SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
+ SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]};
if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
return; // NoOp
@@ -597,7 +604,7 @@ void JitShader::Compile_CALL(Instruction instr) {
// Call the subroutine
FixupBranch b = CALL();
- fixup_branches.push_back({ b, instr.flow_control.dest_offset });
+ fixup_branches.push_back({b, instr.flow_control.dest_offset});
// Skip over the return offset that's on the stack
ADD(64, R(RSP), Imm32(8));
@@ -628,7 +635,7 @@ void JitShader::Compile_CMP(Instruction instr) {
// SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
// emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
// because they don't match when used with NaNs.
- static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE };
+ static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1;
@@ -678,7 +685,8 @@ void JitShader::Compile_MAD(Instruction instr) {
}
void JitShader::Compile_IF(Instruction instr) {
- Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
+ Compile_Assert(instr.flow_control.dest_offset >= program_counter,
+ "Backwards if-statements not supported");
// Evaluate the "IF" condition
if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -709,29 +717,31 @@ void JitShader::Compile_IF(Instruction instr) {
}
void JitShader::Compile_LOOP(Instruction instr) {
- Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
+ Compile_Assert(instr.flow_control.dest_offset >= program_counter,
+ "Backwards loops not supported");
Compile_Assert(!looping, "Nested loops not supported");
looping = true;
- int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
+ int offset =
+ ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
SHR(32, R(LOOPCOUNT_REG), Imm8(8));
AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
MOV(32, R(LOOPINC), R(LOOPCOUNT));
SHR(32, R(LOOPINC), Imm8(16));
- MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
+ MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
- ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
+ ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
auto loop_start = GetCodePtr();
Compile_Block(instr.flow_control.dest_offset + 1);
ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component
- SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
- J_CC(CC_NZ, loop_start); // Loop if not equal
+ SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
+ J_CC(CC_NZ, loop_start); // Loop if not equal
looping = false;
}
@@ -744,11 +754,11 @@ void JitShader::Compile_JMP(Instruction instr) {
else
UNREACHABLE();
- bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
- (instr.flow_control.num_instructions & 1);
+ bool inverted_condition =
+ (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
- fixup_branches.push_back({ b, instr.flow_control.dest_offset });
+ fixup_branches.push_back({b, instr.flow_control.dest_offset});
}
void JitShader::Compile_Block(unsigned end) {
@@ -773,7 +783,8 @@ void JitShader::Compile_NextInstr() {
Compile_Return();
}
- ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
+ ASSERT_MSG(code_ptr[program_counter] == nullptr,
+ "Tried to compile already compiled shader location!");
code_ptr[program_counter] = GetCodePtr();
Instruction instr = GetVertexShaderInstruction(program_counter++);
@@ -787,7 +798,7 @@ void JitShader::Compile_NextInstr() {
} else {
// Unhandled instruction
LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
- instr.opcode.Value().EffectiveOpCode(), instr.hex);
+ instr.opcode.Value().EffectiveOpCode(), instr.hex);
}
}
@@ -801,7 +812,8 @@ void JitShader::FindReturnOffsets() {
case OpCode::Id::CALL:
case OpCode::Id::CALLC:
case OpCode::Id::CALLU:
- return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
+ return_offsets.push_back(instr.flow_control.dest_offset +
+ instr.flow_control.num_instructions);
break;
default:
break;
@@ -835,12 +847,12 @@ void JitShader::Compile() {
XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG));
// Used to set a register to one
- static const __m128 one = { 1.f, 1.f, 1.f, 1.f };
+ static const __m128 one = {1.f, 1.f, 1.f, 1.f};
MOV(PTRBITS, R(RAX), ImmPtr(&one));
MOVAPS(ONE, MatR(RAX));
// Used to negate registers
- static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };
+ static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
MOV(PTRBITS, R(RAX), ImmPtr(&neg));
MOVAPS(NEGBIT, MatR(RAX));
@@ -850,7 +862,8 @@ void JitShader::Compile() {
// Compile entire program
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
- // Set the target for any incomplete branches now that the entire shader program has been emitted
+ // Set the target for any incomplete branches now that the entire shader program has been
+ // emitted
for (const auto& branch : fixup_branches) {
SetJumpTarget(branch.first, code_ptr[branch.second]);
}
@@ -861,7 +874,8 @@ void JitShader::Compile() {
fixup_branches.clear();
fixup_branches.shrink_to_fit();
- uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
+ uintptr_t size =
+ reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5468459d4..2f37ef8bf 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -70,11 +70,11 @@ public:
void Compile_MAD(Instruction instr);
private:
-
void Compile_Block(unsigned end);
void Compile_NextInstr();
- void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
+ void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
+ Gen::X64Reg dest);
void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
/**
@@ -111,8 +111,8 @@ private:
/// Offsets in code where a return needs to be inserted
std::vector<unsigned> return_offsets;
- unsigned program_counter = 0; ///< Offset of the next instruction to decode
- bool looping = false; ///< True if compiling a loop, used to check for nested loops
+ unsigned program_counter = 0; ///< Offset of the next instruction to decode
+ bool looping = false; ///< True if compiling a loop, used to check for nested loops
/// Branches that need to be fixed up once the entire shader program is compiled
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;