summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/shader_interpreter.cpp36
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp52
-rw-r--r--src/video_core/shader/shader_jit_x64.h5
3 files changed, 91 insertions, 2 deletions
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index e14de0768..646171a19 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -334,6 +334,42 @@ void RunInterpreter(UnitState<Debug>& state) {
Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
break;
+ case OpCode::Id::EX2:
+ {
+ Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
+ Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+
+ // EX2 only takes first component exp2 and writes it to all dest components
+ float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32()));
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = ex2_res;
+ }
+
+ Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
+ break;
+ }
+
+ case OpCode::Id::LG2:
+ {
+ Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
+ Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+
+ // LG2 only takes the first component log2 and writes it to all dest components
+ float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32()));
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = lg2_res;
+ }
+
+ Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
+ break;
+ }
+
default:
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 836942c6b..e4b8295b3 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -25,8 +25,8 @@ const JitFunction instr_table[64] = {
&JitCompiler::Compile_DP4, // dp4
nullptr, // dph
nullptr, // unknown
- nullptr, // ex2
- nullptr, // lg2
+ &JitCompiler::Compile_EX2, // ex2
+ &JitCompiler::Compile_LG2, // lg2
nullptr, // unknown
&JitCompiler::Compile_MUL, // mul
nullptr, // lge
@@ -280,6 +280,22 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
}
+void JitCompiler::Compile_PushCallerSavedXMM() {
+#ifndef _WIN32
+ SUB(64, R(RSP), Imm8(2 * 16));
+ MOVUPS(MDisp(RSP, 16), ONE);
+ MOVUPS(MDisp(RSP, 0), NEGBIT);
+#endif
+}
+
+void JitCompiler::Compile_PopCallerSavedXMM() {
+#ifndef _WIN32
+ MOVUPS(NEGBIT, MDisp(RSP, 0));
+ MOVUPS(ONE, MDisp(RSP, 16));
+ ADD(64, R(RSP), Imm8(2 * 16));
+#endif
+}
+
void JitCompiler::Compile_ADD(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
@@ -331,6 +347,38 @@ void JitCompiler::Compile_DP4(Instruction instr) {
Compile_DestEnable(instr, SRC1);
}
+void JitCompiler::Compile_EX2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ MOVSS(XMM0, R(SRC1));
+
+ // The following will actually break the stack alignment
+ ABI_PushAllCallerSavedRegsAndAdjustStack();
+ Compile_PushCallerSavedXMM();
+ ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
+ Compile_PopCallerSavedXMM();
+ ABI_PopAllCallerSavedRegsAndAdjustStack();
+
+ SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
+ MOVAPS(SRC1, R(XMM0));
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_LG2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ MOVSS(XMM0, R(SRC1));
+
+ // The following will actually break the stack alignment
+ ABI_PushAllCallerSavedRegsAndAdjustStack();
+ Compile_PushCallerSavedXMM();
+ ABI_CallFunction(reinterpret_cast<const void*>(log2f));
+ Compile_PopCallerSavedXMM();
+ ABI_PopAllCallerSavedRegsAndAdjustStack();
+
+ SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
+ MOVAPS(SRC1, R(XMM0));
+ Compile_DestEnable(instr, SRC1);
+}
+
void JitCompiler::Compile_MUL(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index b88f2a0d2..a6ae7fbf1 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -37,6 +37,8 @@ public:
void Compile_ADD(Instruction instr);
void Compile_DP3(Instruction instr);
void Compile_DP4(Instruction instr);
+ void Compile_EX2(Instruction instr);
+ void Compile_LG2(Instruction instr);
void Compile_MUL(Instruction instr);
void Compile_FLR(Instruction instr);
void Compile_MAX(Instruction instr);
@@ -67,6 +69,9 @@ private:
void Compile_EvaluateCondition(Instruction instr);
void Compile_UniformCondition(Instruction instr);
+ void Compile_PushCallerSavedXMM();
+ void Compile_PopCallerSavedXMM();
+
/// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
unsigned* offset_ptr = nullptr;