From dc8479928c5aee4c6ad6fe4f59006fb604cee701 Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Sun, 18 Sep 2016 09:38:01 +0900 Subject: Sources: Run clang-format on everything. --- src/video_core/shader/shader.cpp | 39 ++-- src/video_core/shader/shader.h | 92 +++++---- src/video_core/shader/shader_interpreter.cpp | 290 ++++++++++++++------------- src/video_core/shader/shader_interpreter.h | 5 +- src/video_core/shader/shader_jit_x64.cpp | 212 +++++++++++--------- src/video_core/shader/shader_jit_x64.h | 8 +- 6 files changed, 335 insertions(+), 311 deletions(-) (limited to 'src/video_core/shader') diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index f565e2c91..852c5a9a0 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -46,10 +46,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { const auto& output_register_map = g_state.regs.vs_output_attributes[index]; - u32 semantics[4] = { - output_register_map.map_x, output_register_map.map_y, - output_register_map.map_z, output_register_map.map_w - }; + u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, + output_register_map.map_z, output_register_map.map_w}; for (unsigned comp = 0; comp < 4; ++comp) { float24* out = ((float24*)&ret) + semantics[comp]; @@ -65,19 +63,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { index++; } - // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation + // The hardware takes the absolute and saturates vertex colors like this, *before* doing + // interpolation for (unsigned i = 0; i < 4; ++i) { - ret.color[i] = float24::FromFloat32( - std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); + ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); } LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " - "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", - ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), - ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), - ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), - ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), - ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); + "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", + ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), + ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), + ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(), + ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), + ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(), + ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); return ret; } @@ -96,8 +95,9 @@ void ClearCache() { void ShaderSetup::Setup() { #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ - Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); + u64 cache_key = + (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ + Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); auto iter = shader_map.find(cache_key); if (iter != shader_map.end()) { @@ -127,7 +127,7 @@ void ShaderSetup::Run(UnitState& state, const InputVertex& input, int num const auto& attribute_register_map = config.input_register_map; for (unsigned i = 0; i < num_attributes; i++) - state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; + state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; state.conditional_code[0] = false; state.conditional_code[1] = false; @@ -140,10 +140,11 @@ void ShaderSetup::Run(UnitState& state, const InputVertex& input, int num #else RunInterpreter(setup, state, config.main_offset); #endif // ARCHITECTURE_x86_64 - } -DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { +DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, + const Regs::ShaderConfig& config, + const ShaderSetup& setup) { UnitState state; state.debug.max_offset = 0; @@ -155,7 +156,7 @@ DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ boost::fill(state.registers.input, &dummy_register); for (unsigned i = 0; i < num_attributes; i++) - state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; + state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; state.conditional_code[0] = false; state.conditional_code[1] = false; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index fee16df62..830d933a8 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -94,46 +94,46 @@ struct OutputRegisters { static_assert(std::is_pod::value, "Structure is not POD"); // Helper structure used to keep track of data useful for inspection of shader emulation -template +template struct DebugData; -template<> +template <> struct DebugData { // TODO: Hide these behind and interface and move them to DebugData - u32 max_offset; // maximum program counter ever reached + u32 max_offset; // maximum program counter ever reached u32 max_opdesc_id; // maximum swizzle pattern index ever used }; -template<> +template <> struct DebugData { // Records store the input and output operands of a particular instruction. struct Record { enum Type { // Floating point arithmetic operands - SRC1 = 0x1, - SRC2 = 0x2, - SRC3 = 0x4, + SRC1 = 0x1, + SRC2 = 0x2, + SRC3 = 0x4, // Initial and final output operand value - DEST_IN = 0x8, - DEST_OUT = 0x10, + DEST_IN = 0x8, + DEST_OUT = 0x10, // Current and next instruction offset (in words) - CUR_INSTR = 0x20, - NEXT_INSTR = 0x40, + CUR_INSTR = 0x20, + NEXT_INSTR = 0x40, // Output address register value ADDR_REG_OUT = 0x80, // Result of a comparison instruction - CMP_RESULT = 0x100, + CMP_RESULT = 0x100, // Input values for conditional flow control instructions COND_BOOL_IN = 0x200, - COND_CMP_IN = 0x400, + COND_CMP_IN = 0x400, // Input values for a loop - LOOP_INT_IN = 0x800, + LOOP_INT_IN = 0x800, }; Math::Vec4 src1; @@ -156,7 +156,7 @@ struct DebugData { unsigned mask = 0; }; - u32 max_offset; // maximum program counter ever reached + u32 max_offset; // maximum program counter ever reached u32 max_opdesc_id; // maximum swizzle pattern index ever used // List of records for each executed shader instruction @@ -167,10 +167,10 @@ struct DebugData { using DebugDataRecord = DebugData::Record; // Helper function to set a DebugData::Record field based on the template enum parameter. -template +template inline void SetField(DebugDataRecord& record, ValueType value); -template<> +template <> inline void SetField(DebugDataRecord& record, float24* value) { record.src1.x = value[0]; record.src1.y = value[1]; @@ -178,7 +178,7 @@ inline void SetField(DebugDataRecord& record, float24* va record.src1.w = value[3]; } -template<> +template <> inline void SetField(DebugDataRecord& record, float24* value) { record.src2.x = value[0]; record.src2.y = value[1]; @@ -186,7 +186,7 @@ inline void SetField(DebugDataRecord& record, float24* va record.src2.w = value[3]; } -template<> +template <> inline void SetField(DebugDataRecord& record, float24* value) { record.src3.x = value[0]; record.src3.y = value[1]; @@ -194,7 +194,7 @@ inline void SetField(DebugDataRecord& record, float24* va record.src3.w = value[3]; } -template<> +template <> inline void SetField(DebugDataRecord& record, float24* value) { record.dest_in.x = value[0]; record.dest_in.y = value[1]; @@ -202,7 +202,7 @@ inline void SetField(DebugDataRecord& record, float24* record.dest_in.w = value[3]; } -template<> +template <> inline void SetField(DebugDataRecord& record, float24* value) { record.dest_out.x = value[0]; record.dest_out.y = value[1]; @@ -210,67 +210,66 @@ inline void SetField(DebugDataRecord& record, float24 record.dest_out.w = value[3]; } -template<> +template <> inline void SetField(DebugDataRecord& record, s32* value) { record.address_registers[0] = value[0]; record.address_registers[1] = value[1]; } -template<> +template <> inline void SetField(DebugDataRecord& record, bool* value) { record.conditional_code[0] = value[0]; record.conditional_code[1] = value[1]; } -template<> +template <> inline void SetField(DebugDataRecord& record, bool value) { record.cond_bool = value; } -template<> +template <> inline void SetField(DebugDataRecord& record, bool* value) { record.cond_cmp[0] = value[0]; record.cond_cmp[1] = value[1]; } -template<> +template <> inline void SetField(DebugDataRecord& record, Math::Vec4 value) { record.loop_int = value; } -template<> +template <> inline void SetField(DebugDataRecord& record, u32 value) { record.instruction_offset = value; } -template<> +template <> inline void SetField(DebugDataRecord& record, u32 value) { record.next_instruction = value; } // Helper function to set debug information on the current shader iteration. -template +template inline void Record(DebugData& debug_data, u32 offset, ValueType value) { // Debugging disabled => nothing to do } -template +template inline void Record(DebugData& debug_data, u32 offset, ValueType value) { if (offset >= debug_data.records.size()) debug_data.records.resize(offset + 1); - SetField(debug_data.records[offset], value); - debug_data.records[offset].mask |= type; + SetField(debug_data.records[offset], value); + debug_data.records[offset].mask |= type; } - /** * This structure contains the state information that needs to be unique for a shader unit. The 3DS * has four shader units that process shaders in parallel. At the present, Citra only implements a * single shader unit that processes all shaders serially. Putting the state information in a struct * here will make it easier for us to parallelize the shader processing later. */ -template +template struct UnitState { struct Registers { // The registers are accessed by the shader JIT using SSE instructions, and are therefore @@ -293,10 +292,12 @@ struct UnitState { static size_t InputOffset(const SourceRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Input: - return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4); + return offsetof(UnitState, registers.input) + + reg.GetIndex() * sizeof(Math::Vec4); case RegisterType::Temporary: - return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4); + return offsetof(UnitState, registers.temporary) + + reg.GetIndex() * sizeof(Math::Vec4); default: UNREACHABLE(); @@ -307,10 +308,12 @@ struct UnitState { static size_t OutputOffset(const DestRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Output: - return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4); + return offsetof(UnitState, output_registers.value) + + reg.GetIndex() * sizeof(Math::Vec4); case RegisterType::Temporary: - return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4); + return offsetof(UnitState, registers.temporary) + + reg.GetIndex() * sizeof(Math::Vec4); default: UNREACHABLE(); @@ -336,13 +339,13 @@ struct ShaderSetup { static size_t UniformOffset(RegisterType type, unsigned index) { switch (type) { case RegisterType::FloatUniform: - return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4); + return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4); case RegisterType::BoolUniform: - return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); + return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); case RegisterType::IntUniform: - return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4); + return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4); default: UNREACHABLE(); @@ -354,7 +357,8 @@ struct ShaderSetup { std::array swizzle_data; /** - * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per + * Performs any shader unit setup that only needs to happen once per shader (as opposed to once + * per * vertex, which would happen within the `Run` function). */ void Setup(); @@ -375,8 +379,8 @@ struct ShaderSetup { * @param setup Setup object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ - DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); - + DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, + const Regs::ShaderConfig& config, const ShaderSetup& setup); }; } // namespace Shader diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index f6c86a759..681ff9728 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -40,7 +40,7 @@ struct CallStackElement { u32 loop_address; // The address where we'll return to after each loop iteration }; -template +template void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned offset) { // TODO: Is there a maximal size for this? boost::container::static_vector call_stack; @@ -74,14 +74,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned } } - const Instruction instr = { program_code[program_counter] }; - const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; + const Instruction instr = {program_code[program_counter]}; + const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]}; - auto call = [&program_counter, &call_stack](UnitState& state, u32 offset, u32 num_instructions, - u32 return_offset, u8 repeat_count, u8 loop_increment) { - program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + auto call = [&program_counter, &call_stack](UnitState& state, u32 offset, + u32 num_instructions, u32 return_offset, + u8 repeat_count, u8 loop_increment) { + program_counter = + offset - + 1; // -1 to make sure when incrementing the PC we end up at the correct offset ASSERT(call_stack.size() < call_stack.capacity()); - call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); + call_stack.push_back( + {offset + num_instructions, return_offset, repeat_count, loop_increment, offset}); }; Record(state.debug, iteration, program_counter); if (iteration > 0) @@ -106,24 +110,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned }; switch (instr.opcode.Value().GetInfo().type) { - case OpCode::Type::Arithmetic: - { - const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); + case OpCode::Type::Arithmetic: { + const bool is_inverted = + (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); - const int address_offset = (instr.common.address_register_index == 0) - ? 0 : state.address_registers[instr.common.address_register_index - 1]; + const int address_offset = + (instr.common.address_register_index == 0) + ? 0 + : state.address_registers[instr.common.address_register_index - 1]; - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); - const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + + (!is_inverted * address_offset)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + + (is_inverted * address_offset)); const bool negate_src1 = ((bool)swizzle.negate_src1 != false); const bool negate_src2 = ((bool)swizzle.negate_src2 != false); float24 src1[4] = { - src1_[(int)swizzle.GetSelectorSrc1(0)], - src1_[(int)swizzle.GetSelectorSrc1(1)], - src1_[(int)swizzle.GetSelectorSrc1(2)], - src1_[(int)swizzle.GetSelectorSrc1(3)], + src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], + src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], }; if (negate_src1) { src1[0] = src1[0] * float24::FromFloat32(-1); @@ -132,10 +138,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned src1[3] = src1[3] * float24::FromFloat32(-1); } float24 src2[4] = { - src2_[(int)swizzle.GetSelectorSrc2(0)], - src2_[(int)swizzle.GetSelectorSrc2(1)], - src2_[(int)swizzle.GetSelectorSrc2(2)], - src2_[(int)swizzle.GetSelectorSrc2(3)], + src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], + src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], }; if (negate_src2) { src2[0] = src2[0] * float24::FromFloat32(-1); @@ -144,15 +148,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned src2[3] = src2[3] * float24::FromFloat32(-1); } - float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] - : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] - : dummy_vec4_float24; + float24* dest = + (instr.common.dest.Value() < 0x10) + ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] + : (instr.common.dest.Value() < 0x20) + ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] + : dummy_vec4_float24; - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); + state.debug.max_opdesc_id = + std::max(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id); switch (instr.opcode.Value().EffectiveOpCode()) { - case OpCode::Id::ADD: - { + case OpCode::Id::ADD: { Record(state.debug, iteration, src1); Record(state.debug, iteration, src2); Record(state.debug, iteration, dest); @@ -166,8 +173,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned break; } - case OpCode::Id::MUL: - { + case OpCode::Id::MUL: { Record(state.debug, iteration, src1); Record(state.debug, iteration, src2); Record(state.debug, iteration, dest); @@ -228,8 +234,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned case OpCode::Id::DP3: case OpCode::Id::DP4: case OpCode::Id::DPH: - case OpCode::Id::DPHI: - { + case OpCode::Id::DPHI: { Record(state.debug, iteration, src1); Record(state.debug, iteration, src2); Record(state.debug, iteration, dest); @@ -239,7 +244,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned src1[3] = float24::FromFloat32(1.0f); int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; - float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f)); + float24 dot = std::inner_product(src1, src1 + num_components, src2, + float24::FromFloat32(0.f)); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -252,8 +258,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned } // Reciprocal - case OpCode::Id::RCP: - { + case OpCode::Id::RCP: { Record(state.debug, iteration, src1); Record(state.debug, iteration, dest); float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); @@ -268,8 +273,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned } // Reciprocal Square Root - case OpCode::Id::RSQ: - { + case OpCode::Id::RSQ: { Record(state.debug, iteration, src1); Record(state.debug, iteration, dest); float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); @@ -283,8 +287,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned break; } - case OpCode::Id::MOVA: - { + case OpCode::Id::MOVA: { Record(state.debug, iteration, src1); for (int i = 0; i < 2; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -293,12 +296,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned // TODO: Figure out how the rounding is done on hardware state.address_registers[i] = static_cast(src1[i].ToFloat32()); } - Record(state.debug, iteration, state.address_registers); + Record(state.debug, iteration, + state.address_registers); break; } - case OpCode::Id::MOV: - { + case OpCode::Id::MOV: { Record(state.debug, iteration, src1); Record(state.debug, iteration, dest); for (int i = 0; i < 4; ++i) { @@ -320,7 +323,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned if (!swizzle.DestComponentEnabled(i)) continue; - dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); + dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) + : float24::FromFloat32(0.0f); } Record(state.debug, iteration, dest); break; @@ -334,7 +338,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned if (!swizzle.DestComponentEnabled(i)) continue; - dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); + dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) + : float24::FromFloat32(0.0f); } Record(state.debug, iteration, dest); break; @@ -349,40 +354,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); switch (op) { - case Instruction::Common::CompareOpType::Equal: - state.conditional_code[i] = (src1[i] == src2[i]); - break; + case Instruction::Common::CompareOpType::Equal: + state.conditional_code[i] = (src1[i] == src2[i]); + break; - case Instruction::Common::CompareOpType::NotEqual: - state.conditional_code[i] = (src1[i] != src2[i]); - break; + case Instruction::Common::CompareOpType::NotEqual: + state.conditional_code[i] = (src1[i] != src2[i]); + break; - case Instruction::Common::CompareOpType::LessThan: - state.conditional_code[i] = (src1[i] < src2[i]); - break; + case Instruction::Common::CompareOpType::LessThan: + state.conditional_code[i] = (src1[i] < src2[i]); + break; - case Instruction::Common::CompareOpType::LessEqual: - state.conditional_code[i] = (src1[i] <= src2[i]); - break; + case Instruction::Common::CompareOpType::LessEqual: + state.conditional_code[i] = (src1[i] <= src2[i]); + break; - case Instruction::Common::CompareOpType::GreaterThan: - state.conditional_code[i] = (src1[i] > src2[i]); - break; + case Instruction::Common::CompareOpType::GreaterThan: + state.conditional_code[i] = (src1[i] > src2[i]); + break; - case Instruction::Common::CompareOpType::GreaterEqual: - state.conditional_code[i] = (src1[i] >= src2[i]); - break; + case Instruction::Common::CompareOpType::GreaterEqual: + state.conditional_code[i] = (src1[i] >= src2[i]); + break; - default: - LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast(op)); - break; + default: + LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast(op)); + break; } } Record(state.debug, iteration, state.conditional_code); break; - case OpCode::Id::EX2: - { + case OpCode::Id::EX2: { Record(state.debug, iteration, src1); Record(state.debug, iteration, dest); @@ -399,8 +403,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned break; } - case OpCode::Id::LG2: - { + case OpCode::Id::LG2: { Record(state.debug, iteration, src1); Record(state.debug, iteration, dest); @@ -419,7 +422,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned default: LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), + instr.opcode.Value().GetInfo().name, instr.hex); DEBUG_ASSERT(false); break; } @@ -427,30 +431,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned break; } - case OpCode::Type::MultiplyAdd: - { + case OpCode::Type::MultiplyAdd: { if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { - const SwizzlePattern& swizzle = *reinterpret_cast(&swizzle_data[instr.mad.operand_desc_id]); + const SwizzlePattern& swizzle = *reinterpret_cast( + &swizzle_data[instr.mad.operand_desc_id]); bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); - const int address_offset = (instr.mad.address_register_index == 0) - ? 0 : state.address_registers[instr.mad.address_register_index - 1]; + const int address_offset = + (instr.mad.address_register_index == 0) + ? 0 + : state.address_registers[instr.mad.address_register_index - 1]; const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); - const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset)); - const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset)); + const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + + (!is_inverted * address_offset)); + const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + + (is_inverted * address_offset)); const bool negate_src1 = ((bool)swizzle.negate_src1 != false); const bool negate_src2 = ((bool)swizzle.negate_src2 != false); const bool negate_src3 = ((bool)swizzle.negate_src3 != false); float24 src1[4] = { - src1_[(int)swizzle.GetSelectorSrc1(0)], - src1_[(int)swizzle.GetSelectorSrc1(1)], - src1_[(int)swizzle.GetSelectorSrc1(2)], - src1_[(int)swizzle.GetSelectorSrc1(3)], + src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], + src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], }; if (negate_src1) { src1[0] = src1[0] * float24::FromFloat32(-1); @@ -459,10 +465,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned src1[3] = src1[3] * float24::FromFloat32(-1); } float24 src2[4] = { - src2_[(int)swizzle.GetSelectorSrc2(0)], - src2_[(int)swizzle.GetSelectorSrc2(1)], - src2_[(int)swizzle.GetSelectorSrc2(2)], - src2_[(int)swizzle.GetSelectorSrc2(3)], + src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], + src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], }; if (negate_src2) { src2[0] = src2[0] * float24::FromFloat32(-1); @@ -471,10 +475,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned src2[3] = src2[3] * float24::FromFloat32(-1); } float24 src3[4] = { - src3_[(int)swizzle.GetSelectorSrc3(0)], - src3_[(int)swizzle.GetSelectorSrc3(1)], - src3_[(int)swizzle.GetSelectorSrc3(2)], - src3_[(int)swizzle.GetSelectorSrc3(3)], + src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)], + src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)], }; if (negate_src3) { src3[0] = src3[0] * float24::FromFloat32(-1); @@ -483,9 +485,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned src3[3] = src3[3] * float24::FromFloat32(-1); } - float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] - : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] - : dummy_vec4_float24; + float24* dest = + (instr.mad.dest.Value() < 0x10) + ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] + : (instr.mad.dest.Value() < 0x20) + ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] + : dummy_vec4_float24; Record(state.debug, iteration, src1); Record(state.debug, iteration, src2); @@ -500,16 +505,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned Record(state.debug, iteration, dest); } else { LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), + instr.opcode.Value().GetInfo().name, instr.hex); } break; } - default: - { - static auto evaluate_condition = [](const UnitState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { - bool results[2] = { refx == state.conditional_code[0], - refy == state.conditional_code[1] }; + default: { + static auto evaluate_condition = [](const UnitState& state, bool refx, bool refy, + Instruction::FlowControlType flow_control) { + bool results[2] = {refx == state.conditional_code[0], + refy == state.conditional_code[1]}; switch (flow_control.op) { case flow_control.Or: @@ -533,44 +539,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned break; case OpCode::Id::JMPC: - Record(state.debug, iteration, state.conditional_code); - if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { + Record(state.debug, iteration, + state.conditional_code); + if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, + instr.flow_control)) { program_counter = instr.flow_control.dest_offset - 1; } break; case OpCode::Id::JMPU: - Record(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); + Record( + state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); - if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { + if (uniforms.b[instr.flow_control.bool_uniform_id] == + !(instr.flow_control.num_instructions & 1)) { program_counter = instr.flow_control.dest_offset - 1; } break; case OpCode::Id::CALL: - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, program_counter + 1, 0, 0); break; case OpCode::Id::CALLU: - Record(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); + Record( + state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id]) { - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, - program_counter + 1, 0, 0); + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + program_counter + 1, 0, 0); } break; case OpCode::Id::CALLC: - Record(state.debug, iteration, state.conditional_code); - if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, - program_counter + 1, 0, 0); + Record(state.debug, iteration, + state.conditional_code); + if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, + instr.flow_control)) { + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + program_counter + 1, 0, 0); } break; @@ -578,43 +585,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned break; case OpCode::Id::IFU: - Record(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); + Record( + state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id]) { - call(state, - program_counter + 1, + call(state, program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); + instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, + 0); } else { - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, + 0); } break; - case OpCode::Id::IFC: - { + case OpCode::Id::IFC: { // TODO: Do we need to consider swizzlers here? - Record(state.debug, iteration, state.conditional_code); - if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { - call(state, - program_counter + 1, + Record(state.debug, iteration, + state.conditional_code); + if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, + instr.flow_control)) { + call(state, program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); + instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, + 0); } else { - call(state, - instr.flow_control.dest_offset, - instr.flow_control.num_instructions, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, + 0); } break; } - case OpCode::Id::LOOP: - { + case OpCode::Id::LOOP: { Math::Vec4 loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, uniforms.i[instr.flow_control.int_uniform_id].y, uniforms.i[instr.flow_control.int_uniform_id].z, @@ -622,18 +628,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned state.address_registers[2] = loop_param.y; Record(state.debug, iteration, loop_param); - call(state, - program_counter + 1, + call(state, program_counter + 1, instr.flow_control.dest_offset - program_counter + 1, - instr.flow_control.dest_offset + 1, - loop_param.x, - loop_param.z); + instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); break; } default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), + instr.opcode.Value().GetInfo().name, instr.hex); break; } diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index bb3ce1c6e..48ede0a2e 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -8,9 +8,10 @@ namespace Pica { namespace Shader { -template struct UnitState; +template +struct UnitState; -template +template void RunInterpreter(const ShaderSetup& setup, UnitState& state, unsigned offset); } // namespace diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 43e7e6b4c..04e04ba1a 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -31,70 +31,70 @@ using namespace Gen; typedef void (JitShader::*JitFunction)(Instruction instr); const JitFunction instr_table[64] = { - &JitShader::Compile_ADD, // add - &JitShader::Compile_DP3, // dp3 - &JitShader::Compile_DP4, // dp4 - &JitShader::Compile_DPH, // dph - nullptr, // unknown - &JitShader::Compile_EX2, // ex2 - &JitShader::Compile_LG2, // lg2 - nullptr, // unknown - &JitShader::Compile_MUL, // mul - &JitShader::Compile_SGE, // sge - &JitShader::Compile_SLT, // slt - &JitShader::Compile_FLR, // flr - &JitShader::Compile_MAX, // max - &JitShader::Compile_MIN, // min - &JitShader::Compile_RCP, // rcp - &JitShader::Compile_RSQ, // rsq - nullptr, // unknown - nullptr, // unknown - &JitShader::Compile_MOVA, // mova - &JitShader::Compile_MOV, // mov - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - &JitShader::Compile_DPH, // dphi - nullptr, // unknown - &JitShader::Compile_SGE, // sgei - &JitShader::Compile_SLT, // slti - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - nullptr, // unknown - &JitShader::Compile_NOP, // nop - &JitShader::Compile_END, // end - nullptr, // break - &JitShader::Compile_CALL, // call - &JitShader::Compile_CALLC, // callc - &JitShader::Compile_CALLU, // callu - &JitShader::Compile_IF, // ifu - &JitShader::Compile_IF, // ifc - &JitShader::Compile_LOOP, // loop - nullptr, // emit - nullptr, // sete - &JitShader::Compile_JMP, // jmpc - &JitShader::Compile_JMP, // jmpu - &JitShader::Compile_CMP, // cmp - &JitShader::Compile_CMP, // cmp - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // madi - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad - &JitShader::Compile_MAD, // mad + &JitShader::Compile_ADD, // add + &JitShader::Compile_DP3, // dp3 + &JitShader::Compile_DP4, // dp4 + &JitShader::Compile_DPH, // dph + nullptr, // unknown + &JitShader::Compile_EX2, // ex2 + &JitShader::Compile_LG2, // lg2 + nullptr, // unknown + &JitShader::Compile_MUL, // mul + &JitShader::Compile_SGE, // sge + &JitShader::Compile_SLT, // slt + &JitShader::Compile_FLR, // flr + &JitShader::Compile_MAX, // max + &JitShader::Compile_MIN, // min + &JitShader::Compile_RCP, // rcp + &JitShader::Compile_RSQ, // rsq + nullptr, // unknown + nullptr, // unknown + &JitShader::Compile_MOVA, // mova + &JitShader::Compile_MOV, // mov + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + &JitShader::Compile_DPH, // dphi + nullptr, // unknown + &JitShader::Compile_SGE, // sgei + &JitShader::Compile_SLT, // slti + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + nullptr, // unknown + &JitShader::Compile_NOP, // nop + &JitShader::Compile_END, // end + nullptr, // break + &JitShader::Compile_CALL, // call + &JitShader::Compile_CALLC, // callc + &JitShader::Compile_CALLU, // callu + &JitShader::Compile_IF, // ifu + &JitShader::Compile_IF, // ifc + &JitShader::Compile_LOOP, // loop + nullptr, // emit + nullptr, // sete + &JitShader::Compile_JMP, // jmpc + &JitShader::Compile_JMP, // jmpu + &JitShader::Compile_CMP, // cmp + &JitShader::Compile_CMP, // cmp + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // madi + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad + &JitShader::Compile_MAD, // mad }; // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can @@ -136,9 +136,9 @@ static const X64Reg NEGBIT = XMM15; // State registers that must not be modified by external functions calls // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed static const BitSet32 persistent_regs = { - SETUP, STATE, // Pointers to register blocks + SETUP, STATE, // Pointers to register blocks ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers - ONE+16, NEGBIT+16, // Constants + ONE + 16, NEGBIT + 16, // Constants }; /// Raw constant for the source register selector that indicates no swizzling is performed @@ -152,7 +152,7 @@ static const u8 NO_DEST_REG_MASK = 0xf; * @return Instruction at the specified offset */ static Instruction GetVertexShaderInstruction(size_t offset) { - return { g_state.vs.program_code[offset] }; + return {g_state.vs.program_code[offset]}; } static void LogCritical(const char* msg) { @@ -172,7 +172,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) { * @param src_reg SourceRegister object corresponding to the source register to load * @param dest Destination XMM register to store the loaded, swizzled source register */ -void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { +void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, + X64Reg dest) { X64Reg src_ptr; size_t src_offset; @@ -189,7 +190,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe unsigned operand_desc_id; - const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); + const bool is_inverted = + (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); unsigned address_register_index; unsigned offset_src; @@ -225,7 +227,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); } - SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; + SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; // Generate instructions for source register swizzling as needed u8 sel = swiz.GetRawSelector(src_num); @@ -238,13 +240,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe } // If the source register should be negated, flip the negative bit using XOR - const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 }; + const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3}; if (negate[src_num - 1]) { XORPS(dest, R(NEGBIT)); } } -void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { +void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) { DestRegister dest; unsigned operand_desc_id; if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || @@ -256,10 +258,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { dest = instr.common.dest.Value(); } - SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; + SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; int dest_offset_disp = (int)UnitState::OutputOffset(dest); - ASSERT_MSG(dest_offset_disp == UnitState::OutputOffset(dest), "Destinaton offset too large for int type"); + ASSERT_MSG(dest_offset_disp == UnitState::OutputOffset(dest), + "Destinaton offset too large for int type"); // If all components are enabled, write the result to the destination register if (swiz.dest_mask == NO_DEST_REG_MASK) { @@ -267,18 +270,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { MOVAPS(MDisp(STATE, dest_offset_disp), src); } else { - // Not all components are enabled, so mask the result when storing to the destination register... + // Not all components are enabled, so mask the result when storing to the destination + // register... MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); if (Common::GetCPUCaps().sse4_1) { - u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); + u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | + ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); BLENDPS(SCRATCH, R(src), mask); } else { MOVAPS(SCRATCH2, R(src)); UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination - UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination + UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination - // Compute selector to selectively copy source components to destination for SHUFPS instruction + // Compute selector to selectively copy source components to destination for SHUFPS + // instruction u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | @@ -336,7 +342,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { } void JitShader::Compile_UniformCondition(Instruction instr) { - int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); + int offset = + ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); } @@ -512,7 +519,7 @@ void JitShader::Compile_MIN(Instruction instr) { } void JitShader::Compile_MOVA(Instruction instr) { - SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; + SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]}; if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { return; // NoOp @@ -597,7 +604,7 @@ void JitShader::Compile_CALL(Instruction instr) { // Call the subroutine FixupBranch b = CALL(); - fixup_branches.push_back({ b, instr.flow_control.dest_offset }); + fixup_branches.push_back({b, instr.flow_control.dest_offset}); // Skip over the return offset that's on the stack ADD(64, R(RSP), Imm32(8)); @@ -628,7 +635,7 @@ void JitShader::Compile_CMP(Instruction instr) { // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here // because they don't match when used with NaNs. - static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; + static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE}; bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; @@ -678,7 +685,8 @@ void JitShader::Compile_MAD(Instruction instr) { } void JitShader::Compile_IF(Instruction instr) { - Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); + Compile_Assert(instr.flow_control.dest_offset >= program_counter, + "Backwards if-statements not supported"); // Evaluate the "IF" condition if (instr.opcode.Value() == OpCode::Id::IFU) { @@ -709,29 +717,31 @@ void JitShader::Compile_IF(Instruction instr) { } void JitShader::Compile_LOOP(Instruction instr) { - Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); + Compile_Assert(instr.flow_control.dest_offset >= program_counter, + "Backwards loops not supported"); Compile_Assert(!looping, "Nested loops not supported"); looping = true; - int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); + int offset = + ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); SHR(32, R(LOOPCOUNT_REG), Imm8(8)); AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start MOV(32, R(LOOPINC), R(LOOPCOUNT)); SHR(32, R(LOOPINC), Imm8(16)); - MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer + MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count - ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 + ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 auto loop_start = GetCodePtr(); Compile_Block(instr.flow_control.dest_offset + 1); ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component - SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 - J_CC(CC_NZ, loop_start); // Loop if not equal + SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 + J_CC(CC_NZ, loop_start); // Loop if not equal looping = false; } @@ -744,11 +754,11 @@ void JitShader::Compile_JMP(Instruction instr) { else UNREACHABLE(); - bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && - (instr.flow_control.num_instructions & 1); + bool inverted_condition = + (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1); FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); - fixup_branches.push_back({ b, instr.flow_control.dest_offset }); + fixup_branches.push_back({b, instr.flow_control.dest_offset}); } void JitShader::Compile_Block(unsigned end) { @@ -773,7 +783,8 @@ void JitShader::Compile_NextInstr() { Compile_Return(); } - ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); + ASSERT_MSG(code_ptr[program_counter] == nullptr, + "Tried to compile already compiled shader location!"); code_ptr[program_counter] = GetCodePtr(); Instruction instr = GetVertexShaderInstruction(program_counter++); @@ -787,7 +798,7 @@ void JitShader::Compile_NextInstr() { } else { // Unhandled instruction LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", - instr.opcode.Value().EffectiveOpCode(), instr.hex); + instr.opcode.Value().EffectiveOpCode(), instr.hex); } } @@ -801,7 +812,8 @@ void JitShader::FindReturnOffsets() { case OpCode::Id::CALL: case OpCode::Id::CALLC: case OpCode::Id::CALLU: - return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); + return_offsets.push_back(instr.flow_control.dest_offset + + instr.flow_control.num_instructions); break; default: break; @@ -835,12 +847,12 @@ void JitShader::Compile() { XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG)); // Used to set a register to one - static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; + static const __m128 one = {1.f, 1.f, 1.f, 1.f}; MOV(PTRBITS, R(RAX), ImmPtr(&one)); MOVAPS(ONE, MatR(RAX)); // Used to negate registers - static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; + static const __m128 neg = {-0.f, -0.f, -0.f, -0.f}; MOV(PTRBITS, R(RAX), ImmPtr(&neg)); MOVAPS(NEGBIT, MatR(RAX)); @@ -850,7 +862,8 @@ void JitShader::Compile() { // Compile entire program Compile_Block(static_cast(g_state.vs.program_code.size())); - // Set the target for any incomplete branches now that the entire shader program has been emitted + // Set the target for any incomplete branches now that the entire shader program has been + // emitted for (const auto& branch : fixup_branches) { SetJumpTarget(branch.first, code_ptr[branch.second]); } @@ -861,7 +874,8 @@ void JitShader::Compile() { fixup_branches.clear(); fixup_branches.shrink_to_fit(); - uintptr_t size = reinterpret_cast(GetCodePtr()) - reinterpret_cast(program); + uintptr_t size = + reinterpret_cast(GetCodePtr()) - reinterpret_cast(program); ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5468459d4..2f37ef8bf 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -70,11 +70,11 @@ public: void Compile_MAD(Instruction instr); private: - void Compile_Block(unsigned end); void Compile_NextInstr(); - void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); + void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, + Gen::X64Reg dest); void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); /** @@ -111,8 +111,8 @@ private: /// Offsets in code where a return needs to be inserted std::vector return_offsets; - unsigned program_counter = 0; ///< Offset of the next instruction to decode - bool looping = false; ///< True if compiling a loop, used to check for nested loops + unsigned program_counter = 0; ///< Offset of the next instruction to decode + bool looping = false; ///< True if compiling a loop, used to check for nested loops /// Branches that need to be fixed up once the entire shader program is compiled std::vector> fixup_branches; -- cgit v1.2.3