summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/CMakeLists.txt2
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp2
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp6
-rw-r--r--src/shader_recompiler/host_translate_info.h3
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp4
-rw-r--r--src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp44
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp185
-rw-r--r--src/shader_recompiler/ir_opt/passes.h2
-rw-r--r--src/shader_recompiler/runtime_info.h3
-rw-r--r--src/shader_recompiler/shader_info.h1
13 files changed, 253 insertions, 5 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 525b2363c..07e75f9d8 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -216,6 +216,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate_program.h
host_translate_info.h
ir_opt/collect_shader_info_pass.cpp
+ ir_opt/conditional_barrier_pass.cpp
ir_opt/constant_propagation_pass.cpp
ir_opt/dead_code_elimination_pass.cpp
ir_opt/dual_vertex_pass.cpp
@@ -223,6 +224,7 @@ add_library(shader_recompiler STATIC
ir_opt/identity_removal_pass.cpp
ir_opt/layer_pass.cpp
ir_opt/lower_fp16_to_fp32.cpp
+ ir_opt/lower_fp64_to_fp32.cpp
ir_opt/lower_int64_to_int32.cpp
ir_opt/passes.h
ir_opt/position_pass.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
index fd4a61a4d..b795c0179 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -461,7 +461,7 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I
header += fmt::format("R{},", index);
}
if (program.local_memory_size > 0) {
- header += fmt::format("lmem[{}],", program.local_memory_size);
+ header += fmt::format("lmem[{}],", Common::DivCeil(program.local_memory_size, 4U));
}
if (program.info.uses_fswzadd) {
header += "FSWZA[4],FSWZB[4],";
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index c3c2281bb..9ff4028c2 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -479,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
const u32 remainder{4 - element};
const TransformFeedbackVarying* xfb_varying{};
const size_t xfb_varying_index{base_index + element};
- if (xfb_varying_index < runtime_info.xfb_varyings.size()) {
+ if (xfb_varying_index < runtime_info.xfb_count) {
xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index];
xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 0f86a8004..34592a01f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
}
void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
- if (ctx.runtime_info.xfb_varyings.empty()) {
+ if (ctx.runtime_info.xfb_count == 0) {
return;
}
ctx.AddCapability(spv::Capability::TransformFeedback);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index fd15f47ea..bec5db173 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -160,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo
const u32 remainder{4 - element};
const TransformFeedbackVarying* xfb_varying{};
const size_t xfb_varying_index{base_attr_index + element};
- if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) {
+ if (xfb_varying_index < ctx.runtime_info.xfb_count) {
xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index];
xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
}
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 17a6d4888..928b35561 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -280,12 +280,18 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
RemoveUnreachableBlocks(program);
// Replace instructions before the SSA rewrite
+ if (!host_info.support_float64) {
+ Optimization::LowerFp64ToFp32(program);
+ }
if (!host_info.support_float16) {
Optimization::LowerFp16ToFp32(program);
}
if (!host_info.support_int64) {
Optimization::LowerInt64ToInt32(program);
}
+ if (!host_info.support_conditional_barrier) {
+ Optimization::ConditionalBarrierPass(program);
+ }
Optimization::SsaRewritePass(program);
Optimization::ConstantPropagationPass(env, program);
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 2aaa6c5ea..7d2ded907 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -10,6 +10,7 @@ namespace Shader {
/// Misc information about the host
struct HostTranslateInfo {
+ bool support_float64{}; ///< True when the device supports 64-bit floats
bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int64{}; ///< True when the device supports 64-bit integers
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
@@ -17,6 +18,8 @@ struct HostTranslateInfo {
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
///< passthrough shaders
+ bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
+ ///< control flow
};
} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 5a4195217..70292686f 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -424,6 +424,10 @@ void VisitUsages(Info& info, IR::Inst& inst) {
info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
break;
+ case IR::Opcode::LoadLocal:
+ case IR::Opcode::WriteLocal:
+ info.uses_local_memory = true;
+ break;
default:
break;
}
diff --git a/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp
new file mode 100644
index 000000000..c3ed27f4f
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void ConditionalBarrierPass(IR::Program& program) {
+ s32 conditional_control_flow_count{0};
+ s32 conditional_return_count{0};
+ for (IR::AbstractSyntaxNode& node : program.syntax_list) {
+ switch (node.type) {
+ case IR::AbstractSyntaxNode::Type::If:
+ case IR::AbstractSyntaxNode::Type::Loop:
+ conditional_control_flow_count++;
+ break;
+ case IR::AbstractSyntaxNode::Type::EndIf:
+ case IR::AbstractSyntaxNode::Type::Repeat:
+ conditional_control_flow_count--;
+ break;
+ case IR::AbstractSyntaxNode::Type::Unreachable:
+ case IR::AbstractSyntaxNode::Type::Return:
+ if (conditional_control_flow_count > 0) {
+ conditional_return_count++;
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Block:
+ for (IR::Inst& inst : node.data.block->Instructions()) {
+ if ((conditional_control_flow_count > 0 || conditional_return_count > 0) &&
+ inst.GetOpcode() == IR::Opcode::Barrier) {
+ LOG_WARNING(Shader, "Barrier within conditional control flow");
+ inst.ReplaceOpcode(IR::Opcode::Identity);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ ASSERT(conditional_control_flow_count == 0);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
new file mode 100644
index 000000000..5db7a38ad
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
@@ -0,0 +1,185 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+
+constexpr s32 F64ToF32Exp = +1023 - 127;
+constexpr s32 F32ToF64Exp = +127 - 1023;
+
+IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) {
+ const IR::U32 lo{ir.CompositeExtract(packed, 0)};
+ const IR::U32 hi{ir.CompositeExtract(packed, 1)};
+ const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))};
+ const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))};
+ const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))};
+ const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))};
+ const IR::U32 mantissa{
+ ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)};
+ const IR::U32 exp_if_subnorm{
+ ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))};
+ const IR::U32 exp_if_infnan{
+ ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)};
+ const IR::U32 result{
+ ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
+ ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))};
+ return ir.BitCast<IR::F32>(result);
+}
+
+IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) {
+ const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))};
+ const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))};
+ const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))};
+ const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))};
+ const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))};
+ const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))};
+ const IR::U32 exp_if_subnorm{
+ ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))};
+ const IR::U32 exp_if_infnan{
+ ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)};
+ const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))};
+ const IR::U32 hi{
+ ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
+ ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))};
+ return ir.CompositeConstruct(lo, hi);
+}
+
+IR::Opcode Replace(IR::Opcode op) {
+ switch (op) {
+ case IR::Opcode::FPAbs64:
+ return IR::Opcode::FPAbs32;
+ case IR::Opcode::FPAdd64:
+ return IR::Opcode::FPAdd32;
+ case IR::Opcode::FPCeil64:
+ return IR::Opcode::FPCeil32;
+ case IR::Opcode::FPFloor64:
+ return IR::Opcode::FPFloor32;
+ case IR::Opcode::FPFma64:
+ return IR::Opcode::FPFma32;
+ case IR::Opcode::FPMul64:
+ return IR::Opcode::FPMul32;
+ case IR::Opcode::FPNeg64:
+ return IR::Opcode::FPNeg32;
+ case IR::Opcode::FPRoundEven64:
+ return IR::Opcode::FPRoundEven32;
+ case IR::Opcode::FPSaturate64:
+ return IR::Opcode::FPSaturate32;
+ case IR::Opcode::FPClamp64:
+ return IR::Opcode::FPClamp32;
+ case IR::Opcode::FPTrunc64:
+ return IR::Opcode::FPTrunc32;
+ case IR::Opcode::CompositeConstructF64x2:
+ return IR::Opcode::CompositeConstructF32x2;
+ case IR::Opcode::CompositeConstructF64x3:
+ return IR::Opcode::CompositeConstructF32x3;
+ case IR::Opcode::CompositeConstructF64x4:
+ return IR::Opcode::CompositeConstructF32x4;
+ case IR::Opcode::CompositeExtractF64x2:
+ return IR::Opcode::CompositeExtractF32x2;
+ case IR::Opcode::CompositeExtractF64x3:
+ return IR::Opcode::CompositeExtractF32x3;
+ case IR::Opcode::CompositeExtractF64x4:
+ return IR::Opcode::CompositeExtractF32x4;
+ case IR::Opcode::CompositeInsertF64x2:
+ return IR::Opcode::CompositeInsertF32x2;
+ case IR::Opcode::CompositeInsertF64x3:
+ return IR::Opcode::CompositeInsertF32x3;
+ case IR::Opcode::CompositeInsertF64x4:
+ return IR::Opcode::CompositeInsertF32x4;
+ case IR::Opcode::FPOrdEqual64:
+ return IR::Opcode::FPOrdEqual32;
+ case IR::Opcode::FPUnordEqual64:
+ return IR::Opcode::FPUnordEqual32;
+ case IR::Opcode::FPOrdNotEqual64:
+ return IR::Opcode::FPOrdNotEqual32;
+ case IR::Opcode::FPUnordNotEqual64:
+ return IR::Opcode::FPUnordNotEqual32;
+ case IR::Opcode::FPOrdLessThan64:
+ return IR::Opcode::FPOrdLessThan32;
+ case IR::Opcode::FPUnordLessThan64:
+ return IR::Opcode::FPUnordLessThan32;
+ case IR::Opcode::FPOrdGreaterThan64:
+ return IR::Opcode::FPOrdGreaterThan32;
+ case IR::Opcode::FPUnordGreaterThan64:
+ return IR::Opcode::FPUnordGreaterThan32;
+ case IR::Opcode::FPOrdLessThanEqual64:
+ return IR::Opcode::FPOrdLessThanEqual32;
+ case IR::Opcode::FPUnordLessThanEqual64:
+ return IR::Opcode::FPUnordLessThanEqual32;
+ case IR::Opcode::FPOrdGreaterThanEqual64:
+ return IR::Opcode::FPOrdGreaterThanEqual32;
+ case IR::Opcode::FPUnordGreaterThanEqual64:
+ return IR::Opcode::FPUnordGreaterThanEqual32;
+ case IR::Opcode::FPIsNan64:
+ return IR::Opcode::FPIsNan32;
+ case IR::Opcode::ConvertS16F64:
+ return IR::Opcode::ConvertS16F32;
+ case IR::Opcode::ConvertS32F64:
+ return IR::Opcode::ConvertS32F32;
+ case IR::Opcode::ConvertS64F64:
+ return IR::Opcode::ConvertS64F32;
+ case IR::Opcode::ConvertU16F64:
+ return IR::Opcode::ConvertU16F32;
+ case IR::Opcode::ConvertU32F64:
+ return IR::Opcode::ConvertU32F32;
+ case IR::Opcode::ConvertU64F64:
+ return IR::Opcode::ConvertU64F32;
+ case IR::Opcode::ConvertF32F64:
+ return IR::Opcode::Identity;
+ case IR::Opcode::ConvertF64F32:
+ return IR::Opcode::Identity;
+ case IR::Opcode::ConvertF64S8:
+ return IR::Opcode::ConvertF32S8;
+ case IR::Opcode::ConvertF64S16:
+ return IR::Opcode::ConvertF32S16;
+ case IR::Opcode::ConvertF64S32:
+ return IR::Opcode::ConvertF32S32;
+ case IR::Opcode::ConvertF64S64:
+ return IR::Opcode::ConvertF32S64;
+ case IR::Opcode::ConvertF64U8:
+ return IR::Opcode::ConvertF32U8;
+ case IR::Opcode::ConvertF64U16:
+ return IR::Opcode::ConvertF32U16;
+ case IR::Opcode::ConvertF64U32:
+ return IR::Opcode::ConvertF32U32;
+ case IR::Opcode::ConvertF64U64:
+ return IR::Opcode::ConvertF32U64;
+ default:
+ return op;
+ }
+}
+
+void Lower(IR::Block& block, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::PackDouble2x32: {
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0)));
+ break;
+ }
+ case IR::Opcode::UnpackDouble2x32: {
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0)));
+ break;
+ }
+ default:
+ inst.ReplaceOpcode(Replace(inst.GetOpcode()));
+ break;
+ }
+}
+
+} // Anonymous namespace
+
+void LowerFp64ToFp32(IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ Lower(*block, inst);
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 1f8f2ba95..629d18fa1 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -13,10 +13,12 @@ struct HostTranslateInfo;
namespace Shader::Optimization {
void CollectShaderInfoPass(Environment& env, IR::Program& program);
+void ConditionalBarrierPass(IR::Program& program);
void ConstantPropagationPass(Environment& env, IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program);
void IdentityRemovalPass(IR::Program& program);
+void LowerFp64ToFp32(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);
void RescalingPass(IR::Program& program);
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index 3b63c249f..619c0b138 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -84,7 +84,8 @@ struct RuntimeInfo {
bool glasm_use_storage_buffers{};
/// Transform feedback state for each varying
- std::vector<TransformFeedbackVarying> xfb_varyings;
+ std::array<TransformFeedbackVarying, 256> xfb_varyings{};
+ u32 xfb_count{0};
};
} // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index d308db942..b4b4afd37 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -172,6 +172,7 @@ struct Info {
bool stores_indexed_attributes{};
bool stores_global_memory{};
+ bool uses_local_memory{};
bool uses_fp16{};
bool uses_fp64{};