summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/shader.cpp41
-rw-r--r--src/video_core/shader/shader.h49
-rw-r--r--src/video_core/shader/shader_interpreter.cpp16
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.cpp49
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.h2
5 files changed, 153 insertions, 4 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 67ed19ba8..e9063e616 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -21,7 +21,8 @@ namespace Pica {
namespace Shader {
-OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) {
+OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
+ const AttributeBuffer& input) {
// Setup output data
union {
OutputVertex ret{};
@@ -82,6 +83,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) {
}
}
+UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {}
+
+GSEmitter::GSEmitter() {
+ handlers = new Handlers;
+}
+
+GSEmitter::~GSEmitter() {
+ delete handlers;
+}
+
+void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) {
+ ASSERT(vertex_id < 3);
+ std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin());
+ if (prim_emit) {
+ if (winding)
+ handlers->winding_setter();
+ for (size_t i = 0; i < buffer.size(); ++i) {
+ AttributeBuffer output;
+ unsigned int output_i = 0;
+ for (unsigned int reg : Common::BitSet<u32>(output_mask)) {
+ output.attr[output_i++] = buffer[i][reg];
+ }
+ handlers->vertex_handler(output);
+ }
+ }
+}
+
+GSUnitState::GSUnitState() : UnitState(&emitter) {}
+
+void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) {
+ emitter.handlers->vertex_handler = std::move(vertex_handler);
+ emitter.handlers->winding_setter = std::move(winding_setter);
+}
+
+void GSUnitState::ConfigOutput(const ShaderRegs& config) {
+ emitter.output_mask = config.output_mask;
+}
+
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
#ifdef ARCHITECTURE_x86_64
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index e156f6aef..a3789da01 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
+#include <functional>
#include <type_traits>
#include <nihstro/shader_bytecode.h>
#include "common/assert.h"
@@ -31,6 +32,12 @@ struct AttributeBuffer {
alignas(16) Math::Vec4<float24> attr[16];
};
+/// Handler type for receiving vertex outputs from vertex shader or geometry shader
+using VertexHandler = std::function<void(const AttributeBuffer&)>;
+
+/// Handler type for signaling to invert the vertex order of the next triangle
+using WindingSetter = std::function<void()>;
+
struct OutputVertex {
Math::Vec4<float24> pos;
Math::Vec4<float24> quat;
@@ -43,7 +50,8 @@ struct OutputVertex {
INSERT_PADDING_WORDS(1);
Math::Vec2<float24> tc2;
- static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output);
+ static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs,
+ const AttributeBuffer& output);
};
#define ASSERT_POS(var, pos) \
static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \
@@ -61,12 +69,36 @@ static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
/**
+ * This structure contains state information for primitive emitting in geometry shader.
+ */
+struct GSEmitter {
+ std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer;
+ u8 vertex_id;
+ bool prim_emit;
+ bool winding;
+ u32 output_mask;
+
+ // Function objects are hidden behind a raw pointer to make the structure standard layout type,
+ // for JIT to use offsetof to access other members.
+ struct Handlers {
+ VertexHandler vertex_handler;
+ WindingSetter winding_setter;
+ } * handlers;
+
+ GSEmitter();
+ ~GSEmitter();
+ void Emit(Math::Vec4<float24> (&vertex)[16]);
+};
+static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type");
+
+/**
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
* has four shader units that process shaders in parallel. At the present, Citra only implements a
* single shader unit that processes all shaders serially. Putting the state information in a struct
* here will make it easier for us to parallelize the shader processing later.
*/
struct UnitState {
+ explicit UnitState(GSEmitter* emitter = nullptr);
struct Registers {
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
// required to be 16-byte aligned.
@@ -82,6 +114,8 @@ struct UnitState {
// TODO: How many bits do these actually have?
s32 address_registers[3];
+ GSEmitter* emitter_ptr;
+
static size_t InputOffset(const SourceRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Input:
@@ -125,6 +159,19 @@ struct UnitState {
void WriteOutput(const ShaderRegs& config, AttributeBuffer& output);
};
+/**
+ * This is an extended shader unit state that represents the special unit that can run both vertex
+ * shader and geometry shader. It contains an additional primitive emitter and utilities for
+ * geometry shader.
+ */
+struct GSUnitState : public UnitState {
+ GSUnitState();
+ void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter);
+ void ConfigOutput(const ShaderRegs& config);
+
+ GSEmitter emitter;
+};
+
struct ShaderSetup {
struct {
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 206c0978a..9d4da4904 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
break;
}
+ case OpCode::Id::EMIT: {
+ GSEmitter* emitter = state.emitter_ptr;
+ ASSERT_MSG(emitter, "Execute EMIT on VS");
+ emitter->Emit(state.registers.output);
+ break;
+ }
+
+ case OpCode::Id::SETEMIT: {
+ GSEmitter* emitter = state.emitter_ptr;
+ ASSERT_MSG(emitter, "Execute SETEMIT on VS");
+ emitter->vertex_id = instr.setemit.vertex_id;
+ emitter->prim_emit = instr.setemit.prim_emit != 0;
+ emitter->winding = instr.setemit.winding != 0;
+ break;
+ }
+
default:
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value().EffectiveOpCode(),
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp
index 42a57aab1..1b31623bd 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.cpp
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@@ -75,8 +75,8 @@ const JitFunction instr_table[64] = {
&JitShader::Compile_IF, // ifu
&JitShader::Compile_IF, // ifc
&JitShader::Compile_LOOP, // loop
- nullptr, // emit
- nullptr, // sete
+ &JitShader::Compile_EMIT, // emit
+ &JitShader::Compile_SETE, // sete
&JitShader::Compile_JMP, // jmpc
&JitShader::Compile_JMP, // jmpu
&JitShader::Compile_CMP, // cmp
@@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) {
}
}
+static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) {
+ emitter->Emit(*output);
+}
+
+void JitShader::Compile_EMIT(Instruction instr) {
+ Label have_emitter, end;
+ mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
+ test(rax, rax);
+ jnz(have_emitter);
+
+ ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS"));
+ CallFarFunction(*this, LogCritical);
+ ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ jmp(end);
+
+ L(have_emitter);
+ ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ mov(ABI_PARAM1, rax);
+ mov(ABI_PARAM2, STATE);
+ add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output)));
+ CallFarFunction(*this, Emit);
+ ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ L(end);
+}
+
+void JitShader::Compile_SETE(Instruction instr) {
+ Label have_emitter, end;
+ mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
+ test(rax, rax);
+ jnz(have_emitter);
+
+ ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS"));
+ CallFarFunction(*this, LogCritical);
+ ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ jmp(end);
+
+ L(have_emitter);
+ mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id);
+ mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit);
+ mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding);
+ L(end);
+}
+
void JitShader::Compile_Block(unsigned end) {
while (program_counter < end) {
Compile_NextInstr();
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h
index 31af0ca48..4aee56b1d 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.h
+++ b/src/video_core/shader/shader_jit_x64_compiler.h
@@ -66,6 +66,8 @@ public:
void Compile_JMP(Instruction instr);
void Compile_CMP(Instruction instr);
void Compile_MAD(Instruction instr);
+ void Compile_EMIT(Instruction instr);
+ void Compile_SETE(Instruction instr);
private:
void Compile_Block(unsigned end);