From f37e39deb9abe88b4874ebc2889ed52e02ed9c13 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 14 Aug 2014 14:30:38 +0200 Subject: Pica: Add debug utilities for dumping shaders. --- src/video_core/vertex_shader.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 93830a96a..8df14b51f 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -4,6 +4,7 @@ #include "pica.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" #include #include @@ -50,6 +51,11 @@ struct VertexShaderState { }; u32 call_stack[8]; // TODO: What is the maximal call stack depth? u32* call_stack_pointer; + + struct { + u32 max_offset; // maximum program counter ever reached + u32 max_opdesc_id; // maximum swizzle pattern index ever used + } debug; }; static void ProcessShaderCode(VertexShaderState& state) { @@ -57,6 +63,7 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; + state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x @@ -88,6 +95,7 @@ static void ProcessShaderCode(VertexShaderState& state) { switch (instr.opcode) { case Instruction::OpCode::ADD: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -100,6 +108,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MUL: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -113,6 +122,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); float24 dot = float24::FromFloat32(0.f); int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) @@ -130,6 +140,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal case Instruction::OpCode::RCP: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -145,6 +156,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal Square Root case Instruction::OpCode::RSQ: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -159,6 +171,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MOV: { + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -212,6 +225,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) const u32* main = &shader_memory[registers.vs_main_offset]; state.program_counter = (u32*)main; + state.debug.max_offset = 0; + state.debug.max_opdesc_id = 0; // Setup input register table const auto& attribute_register_map = registers.vs_input_register_map; @@ -255,6 +270,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.call_stack_pointer = &state.call_stack[0]; ProcessShaderCode(state); + DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, + state.debug.max_opdesc_id, registers.vs_main_offset, + registers.vs_output_attributes); DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), -- cgit v1.2.3 From 62c36a4ef0a37fe83bb8f8680f928970bead545b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 14 Aug 2014 23:28:55 +0200 Subject: Pica/VertexShader: Fix a bug in the bitfield definitions and add the "negate" field for swizzlers. --- src/video_core/vertex_shader.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 8df14b51f..cdecbff3c 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -65,26 +65,32 @@ static void ProcessShaderCode(VertexShaderState& state) { const Instruction& instr = *(const Instruction*)state.program_counter; state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); - const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] - : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x - : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x - : nullptr; - const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] - : &state.temporary_registers[instr.common.src2-0x10].x; - // TODO: Unsure about the limit values - float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] - : (instr.common.dest <= 0x3C) ? nullptr - : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] + const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] + : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x + : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x : nullptr; + const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] + : &state.temporary_registers[instr.common.src2.GetIndex()].x; + float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] + : (instr.common.dest < 0x10) ? nullptr + : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] + : nullptr; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; + const bool negate_src1 = swizzle.negate; - const float24 src1[4] = { + float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], }; + if (negate_src1) { + src1[0] = src1[0] * float24::FromFloat32(-1); + src1[1] = src1[1] * float24::FromFloat32(-1); + src1[2] = src1[2] * float24::FromFloat32(-1); + src1[3] = src1[3] * float24::FromFloat32(-1); + } const float24 src2[4] = { src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], -- cgit v1.2.3 From 697b50c3ff1b0d179c5862efa56cb19fa640824c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 23 Aug 2014 14:14:34 +0200 Subject: Pica/VertexShader: Fix a bug in the call stack handling. --- src/video_core/vertex_shader.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index cdecbff3c..db8244317 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -191,8 +191,9 @@ static void ProcessShaderCode(VertexShaderState& state) { if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { exit_loop = true; } else { - state.program_counter = &shader_memory[*state.call_stack_pointer--]; - *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; + // Jump back to call stack position, invalidate call stack entry, move up call stack pointer + state.program_counter = &shader_memory[*state.call_stack_pointer]; + *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; } break; -- cgit v1.2.3