summaryrefslogtreecommitdiffstats
path: root/src/video_core/vertex_shader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/vertex_shader.cpp')
-rw-r--r--src/video_core/vertex_shader.cpp51
1 files changed, 38 insertions, 13 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 93830a96a..db8244317 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -4,6 +4,7 @@
#include "pica.h"
#include "vertex_shader.h"
+#include "debug_utils/debug_utils.h"
#include <core/mem_map.h>
#include <common/file_util.h>
@@ -50,6 +51,11 @@ struct VertexShaderState {
};
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
u32* call_stack_pointer;
+
+ struct {
+ u32 max_offset; // maximum program counter ever reached
+ u32 max_opdesc_id; // maximum swizzle pattern index ever used
+ } debug;
};
static void ProcessShaderCode(VertexShaderState& state) {
@@ -57,27 +63,34 @@ static void ProcessShaderCode(VertexShaderState& state) {
bool increment_pc = true;
bool exit_loop = false;
const Instruction& instr = *(const Instruction*)state.program_counter;
+ state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory));
- const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1]
- : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x
- : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x
- : nullptr;
- const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2]
- : &state.temporary_registers[instr.common.src2-0x10].x;
- // TODO: Unsure about the limit values
- float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest]
- : (instr.common.dest <= 0x3C) ? nullptr
- : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4]
+ const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()]
+ : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x
+ : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x
: nullptr;
+ const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()]
+ : &state.temporary_registers[instr.common.src2.GetIndex()].x;
+ float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
+ : (instr.common.dest < 0x10) ? nullptr
+ : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
+ : nullptr;
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
+ const bool negate_src1 = swizzle.negate;
- const float24 src1[4] = {
+ float24 src1[4] = {
src1_[(int)swizzle.GetSelectorSrc1(0)],
src1_[(int)swizzle.GetSelectorSrc1(1)],
src1_[(int)swizzle.GetSelectorSrc1(2)],
src1_[(int)swizzle.GetSelectorSrc1(3)],
};
+ if (negate_src1) {
+ src1[0] = src1[0] * float24::FromFloat32(-1);
+ src1[1] = src1[1] * float24::FromFloat32(-1);
+ src1[2] = src1[2] * float24::FromFloat32(-1);
+ src1[3] = src1[3] * float24::FromFloat32(-1);
+ }
const float24 src2[4] = {
src2_[(int)swizzle.GetSelectorSrc2(0)],
src2_[(int)swizzle.GetSelectorSrc2(1)],
@@ -88,6 +101,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
switch (instr.opcode) {
case Instruction::OpCode::ADD:
{
+ state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
@@ -100,6 +114,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
case Instruction::OpCode::MUL:
{
+ state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
@@ -113,6 +128,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
case Instruction::OpCode::DP3:
case Instruction::OpCode::DP4:
{
+ state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
float24 dot = float24::FromFloat32(0.f);
int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
for (int i = 0; i < num_components; ++i)
@@ -130,6 +146,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
// Reciprocal
case Instruction::OpCode::RCP:
{
+ state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
@@ -145,6 +162,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
// Reciprocal Square Root
case Instruction::OpCode::RSQ:
{
+ state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
@@ -159,6 +177,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
case Instruction::OpCode::MOV:
{
+ state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
@@ -172,8 +191,9 @@ static void ProcessShaderCode(VertexShaderState& state) {
if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
exit_loop = true;
} else {
- state.program_counter = &shader_memory[*state.call_stack_pointer--];
- *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS;
+ // Jump back to call stack position, invalidate call stack entry, move up call stack pointer
+ state.program_counter = &shader_memory[*state.call_stack_pointer];
+ *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS;
}
break;
@@ -212,6 +232,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
const u32* main = &shader_memory[registers.vs_main_offset];
state.program_counter = (u32*)main;
+ state.debug.max_offset = 0;
+ state.debug.max_opdesc_id = 0;
// Setup input register table
const auto& attribute_register_map = registers.vs_input_register_map;
@@ -255,6 +277,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
state.call_stack_pointer = &state.call_stack[0];
ProcessShaderCode(state);
+ DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data,
+ state.debug.max_opdesc_id, registers.vs_main_offset,
+ registers.vs_output_attributes);
DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),