summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/CMakeLists.txt2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp10
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp44
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp17
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp61
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.h16
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp6
-rw-r--r--src/shader_recompiler/host_translate_info.h3
-rw-r--r--src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp44
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp185
-rw-r--r--src/shader_recompiler/ir_opt/passes.h2
-rw-r--r--src/shader_recompiler/profile.h3
-rw-r--r--src/shader_recompiler/runtime_info.h2
13 files changed, 345 insertions, 50 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 525b2363c..07e75f9d8 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -216,6 +216,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate_program.h
host_translate_info.h
ir_opt/collect_shader_info_pass.cpp
+ ir_opt/conditional_barrier_pass.cpp
ir_opt/constant_propagation_pass.cpp
ir_opt/dead_code_elimination_pass.cpp
ir_opt/dual_vertex_pass.cpp
@@ -223,6 +224,7 @@ add_library(shader_recompiler STATIC
ir_opt/identity_removal_pass.cpp
ir_opt/layer_pass.cpp
ir_opt/lower_fp16_to_fp32.cpp
+ ir_opt/lower_fp64_to_fp32.cpp
ir_opt/lower_int64_to_int32.cpp
ir_opt/passes.h
ir_opt/position_pass.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
index 4b3043b65..0ce73f289 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -69,6 +69,11 @@ Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value&
Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id),
Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
+ if (!ctx.profile.support_descriptor_aliasing) {
+ LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic.");
+ return ctx.ConstantNull(ctx.U64);
+ }
+
if (ctx.profile.support_int64_atomics) {
const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
binding, offset, sizeof(u64))};
@@ -86,6 +91,11 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value&
Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
+ if (!ctx.profile.support_descriptor_aliasing) {
+ LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic.");
+ return ctx.ConstantNull(ctx.U32[2]);
+ }
+
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
binding, offset, sizeof(u32[2]))};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 07c2b7b8a..2868fc57d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -10,27 +10,6 @@
namespace Shader::Backend::SPIRV {
namespace {
-struct AttrInfo {
- Id pointer;
- Id id;
- bool needs_cast;
-};
-
-std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
- const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
- switch (type) {
- case AttributeType::Float:
- return AttrInfo{ctx.input_f32, ctx.F32[1], false};
- case AttributeType::UnsignedInt:
- return AttrInfo{ctx.input_u32, ctx.U32[1], true};
- case AttributeType::SignedInt:
- return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
- case AttributeType::Disabled:
- return std::nullopt;
- }
- throw InvalidArgument("Invalid attribute type {}", type);
-}
-
template <typename... Args>
Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) {
switch (ctx.stage) {
@@ -302,15 +281,26 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
const u32 element{static_cast<u32>(attr) % 4};
if (IR::IsGeneric(attr)) {
const u32 index{IR::GenericAttributeIndex(attr)};
- const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
- if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
+ const auto& generic{ctx.input_generics.at(index)};
+ if (!ValidId(generic.id)) {
// Attribute is disabled or varying component is not written
return ctx.Const(element == 3 ? 1.0f : 0.0f);
}
- const Id generic_id{ctx.input_generics.at(index)};
- const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
- const Id value{ctx.OpLoad(type->id, pointer)};
- return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
+ const Id pointer{
+ AttrPointer(ctx, generic.pointer_type, vertex, generic.id, ctx.Const(element))};
+ const Id value{ctx.OpLoad(generic.component_type, pointer)};
+ return [&ctx, generic, value]() {
+ switch (generic.load_op) {
+ case InputGenericLoadOp::Bitcast:
+ return ctx.OpBitcast(ctx.F32[1], value);
+ case InputGenericLoadOp::SToF:
+ return ctx.OpConvertSToF(ctx.F32[1], value);
+ case InputGenericLoadOp::UToF:
+ return ctx.OpConvertUToF(ctx.F32[1], value);
+ default:
+ return value;
+ };
+ }();
}
switch (attr) {
case IR::Attribute::PrimitiveId:
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index c5db19d09..77ff8c573 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -17,7 +17,22 @@ Id GetThreadId(EmitContext& ctx) {
Id WarpExtract(EmitContext& ctx, Id value) {
const Id thread_id{GetThreadId(ctx)};
const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
- return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
+ if (ctx.profile.has_broken_spirv_subgroup_mask_vector_extract_dynamic) {
+ const Id c0_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(0U)),
+ ctx.OpCompositeExtract(ctx.U32[1], value, 0U), ctx.Const(0U))};
+ const Id c1_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(1U)),
+ ctx.OpCompositeExtract(ctx.U32[1], value, 1U), ctx.Const(0U))};
+ const Id c2_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(2U)),
+ ctx.OpCompositeExtract(ctx.U32[1], value, 2U), ctx.Const(0U))};
+ const Id c3_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(3U)),
+ ctx.OpCompositeExtract(ctx.U32[1], value, 3U), ctx.Const(0U))};
+ const Id c0_or_c1{ctx.OpBitwiseOr(ctx.U32[1], c0_sel, c1_sel)};
+ const Id c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c2_sel, c3_sel)};
+ const Id c0_or_c1_or_c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c0_or_c1, c2_or_c3)};
+ return c0_or_c1_or_c2_or_c3;
+ } else {
+ return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
+ }
}
Id LoadMask(EmitContext& ctx, Id mask) {
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 47739794f..fd15f47ea 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -25,12 +25,6 @@ enum class Operation {
FPMax,
};
-struct AttrInfo {
- Id pointer;
- Id id;
- bool needs_cast;
-};
-
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
const spv::ImageFormat format{spv::ImageFormat::Unknown};
const Id type{ctx.F32[1]};
@@ -206,23 +200,37 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) {
return ctx.TypeVector(ctx.TypeInt(32, true), 4);
case AttributeType::UnsignedInt:
return ctx.U32[4];
+ case AttributeType::SignedScaled:
+ return ctx.profile.support_scaled_attributes ? ctx.F32[4]
+ : ctx.TypeVector(ctx.TypeInt(32, true), 4);
+ case AttributeType::UnsignedScaled:
+ return ctx.profile.support_scaled_attributes ? ctx.F32[4] : ctx.U32[4];
case AttributeType::Disabled:
break;
}
throw InvalidArgument("Invalid attribute type {}", type);
}
-std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
- const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
+InputGenericInfo GetAttributeInfo(EmitContext& ctx, AttributeType type, Id id) {
switch (type) {
case AttributeType::Float:
- return AttrInfo{ctx.input_f32, ctx.F32[1], false};
+ return InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None};
case AttributeType::UnsignedInt:
- return AttrInfo{ctx.input_u32, ctx.U32[1], true};
+ return InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::Bitcast};
case AttributeType::SignedInt:
- return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
+ return InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true),
+ InputGenericLoadOp::Bitcast};
+ case AttributeType::SignedScaled:
+ return ctx.profile.support_scaled_attributes
+ ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None}
+ : InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true),
+ InputGenericLoadOp::SToF};
+ case AttributeType::UnsignedScaled:
+ return ctx.profile.support_scaled_attributes
+ ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None}
+ : InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::UToF};
case AttributeType::Disabled:
- return std::nullopt;
+ return InputGenericInfo{};
}
throw InvalidArgument("Invalid attribute type {}", type);
}
@@ -746,18 +754,29 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
continue;
}
AddLabel(labels[label_index]);
- const auto type{AttrTypes(*this, static_cast<u32>(index))};
- if (!type) {
+ const auto& generic{input_generics.at(index)};
+ const Id generic_id{generic.id};
+ if (!ValidId(generic_id)) {
OpReturnValue(Const(0.0f));
++label_index;
continue;
}
- const Id generic_id{input_generics.at(index)};
- const Id pointer{is_array
- ? OpAccessChain(type->pointer, generic_id, vertex, masked_index)
- : OpAccessChain(type->pointer, generic_id, masked_index)};
- const Id value{OpLoad(type->id, pointer)};
- const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};
+ const Id pointer{
+ is_array ? OpAccessChain(generic.pointer_type, generic_id, vertex, masked_index)
+ : OpAccessChain(generic.pointer_type, generic_id, masked_index)};
+ const Id value{OpLoad(generic.component_type, pointer)};
+ const Id result{[this, generic, value]() {
+ switch (generic.load_op) {
+ case InputGenericLoadOp::Bitcast:
+ return OpBitcast(F32[1], value);
+ case InputGenericLoadOp::SToF:
+ return OpConvertSToF(F32[1], value);
+ case InputGenericLoadOp::UToF:
+ return OpConvertUToF(F32[1], value);
+ default:
+ return value;
+ };
+ }()};
OpReturnValue(result);
++label_index;
}
@@ -1457,7 +1476,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
const Id id{DefineInput(*this, type, true)};
Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
Name(id, fmt::format("in_attr{}", index));
- input_generics[index] = id;
+ input_generics[index] = GetAttributeInfo(*this, input_type, id);
if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) {
Decorate(id, spv::Decoration::PassthroughNV);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 768a4fbb5..e63330f11 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -95,6 +95,20 @@ struct StorageDefinitions {
Id U32x4{};
};
+enum class InputGenericLoadOp {
+ None,
+ Bitcast,
+ SToF,
+ UToF,
+};
+
+struct InputGenericInfo {
+ Id id;
+ Id pointer_type;
+ Id component_type;
+ InputGenericLoadOp load_op;
+};
+
struct GenericElementInfo {
Id id{};
u32 first_element{};
@@ -283,7 +297,7 @@ public:
bool need_input_position_indirect{};
Id input_position{};
- std::array<Id, 32> input_generics{};
+ std::array<InputGenericInfo, 32> input_generics{};
Id output_point_size{};
Id output_position{};
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 17a6d4888..928b35561 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -280,12 +280,18 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
RemoveUnreachableBlocks(program);
// Replace instructions before the SSA rewrite
+ if (!host_info.support_float64) {
+ Optimization::LowerFp64ToFp32(program);
+ }
if (!host_info.support_float16) {
Optimization::LowerFp16ToFp32(program);
}
if (!host_info.support_int64) {
Optimization::LowerInt64ToInt32(program);
}
+ if (!host_info.support_conditional_barrier) {
+ Optimization::ConditionalBarrierPass(program);
+ }
Optimization::SsaRewritePass(program);
Optimization::ConstantPropagationPass(env, program);
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 2aaa6c5ea..7d2ded907 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -10,6 +10,7 @@ namespace Shader {
/// Misc information about the host
struct HostTranslateInfo {
+ bool support_float64{}; ///< True when the device supports 64-bit floats
bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int64{}; ///< True when the device supports 64-bit integers
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
@@ -17,6 +18,8 @@ struct HostTranslateInfo {
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
///< passthrough shaders
+ bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
+ ///< control flow
};
} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp
new file mode 100644
index 000000000..c3ed27f4f
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void ConditionalBarrierPass(IR::Program& program) {
+ s32 conditional_control_flow_count{0};
+ s32 conditional_return_count{0};
+ for (IR::AbstractSyntaxNode& node : program.syntax_list) {
+ switch (node.type) {
+ case IR::AbstractSyntaxNode::Type::If:
+ case IR::AbstractSyntaxNode::Type::Loop:
+ conditional_control_flow_count++;
+ break;
+ case IR::AbstractSyntaxNode::Type::EndIf:
+ case IR::AbstractSyntaxNode::Type::Repeat:
+ conditional_control_flow_count--;
+ break;
+ case IR::AbstractSyntaxNode::Type::Unreachable:
+ case IR::AbstractSyntaxNode::Type::Return:
+ if (conditional_control_flow_count > 0) {
+ conditional_return_count++;
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Block:
+ for (IR::Inst& inst : node.data.block->Instructions()) {
+ if ((conditional_control_flow_count > 0 || conditional_return_count > 0) &&
+ inst.GetOpcode() == IR::Opcode::Barrier) {
+ LOG_WARNING(Shader, "Barrier within conditional control flow");
+ inst.ReplaceOpcode(IR::Opcode::Identity);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ ASSERT(conditional_control_flow_count == 0);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
new file mode 100644
index 000000000..5db7a38ad
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
@@ -0,0 +1,185 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+
+constexpr s32 F64ToF32Exp = +1023 - 127;
+constexpr s32 F32ToF64Exp = +127 - 1023;
+
+IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) {
+ const IR::U32 lo{ir.CompositeExtract(packed, 0)};
+ const IR::U32 hi{ir.CompositeExtract(packed, 1)};
+ const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))};
+ const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))};
+ const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))};
+ const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))};
+ const IR::U32 mantissa{
+ ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)};
+ const IR::U32 exp_if_subnorm{
+ ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))};
+ const IR::U32 exp_if_infnan{
+ ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)};
+ const IR::U32 result{
+ ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
+ ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))};
+ return ir.BitCast<IR::F32>(result);
+}
+
+IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) {
+ const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))};
+ const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))};
+ const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))};
+ const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))};
+ const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))};
+ const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))};
+ const IR::U32 exp_if_subnorm{
+ ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))};
+ const IR::U32 exp_if_infnan{
+ ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)};
+ const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))};
+ const IR::U32 hi{
+ ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
+ ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))};
+ return ir.CompositeConstruct(lo, hi);
+}
+
+IR::Opcode Replace(IR::Opcode op) {
+ switch (op) {
+ case IR::Opcode::FPAbs64:
+ return IR::Opcode::FPAbs32;
+ case IR::Opcode::FPAdd64:
+ return IR::Opcode::FPAdd32;
+ case IR::Opcode::FPCeil64:
+ return IR::Opcode::FPCeil32;
+ case IR::Opcode::FPFloor64:
+ return IR::Opcode::FPFloor32;
+ case IR::Opcode::FPFma64:
+ return IR::Opcode::FPFma32;
+ case IR::Opcode::FPMul64:
+ return IR::Opcode::FPMul32;
+ case IR::Opcode::FPNeg64:
+ return IR::Opcode::FPNeg32;
+ case IR::Opcode::FPRoundEven64:
+ return IR::Opcode::FPRoundEven32;
+ case IR::Opcode::FPSaturate64:
+ return IR::Opcode::FPSaturate32;
+ case IR::Opcode::FPClamp64:
+ return IR::Opcode::FPClamp32;
+ case IR::Opcode::FPTrunc64:
+ return IR::Opcode::FPTrunc32;
+ case IR::Opcode::CompositeConstructF64x2:
+ return IR::Opcode::CompositeConstructF32x2;
+ case IR::Opcode::CompositeConstructF64x3:
+ return IR::Opcode::CompositeConstructF32x3;
+ case IR::Opcode::CompositeConstructF64x4:
+ return IR::Opcode::CompositeConstructF32x4;
+ case IR::Opcode::CompositeExtractF64x2:
+ return IR::Opcode::CompositeExtractF32x2;
+ case IR::Opcode::CompositeExtractF64x3:
+ return IR::Opcode::CompositeExtractF32x3;
+ case IR::Opcode::CompositeExtractF64x4:
+ return IR::Opcode::CompositeExtractF32x4;
+ case IR::Opcode::CompositeInsertF64x2:
+ return IR::Opcode::CompositeInsertF32x2;
+ case IR::Opcode::CompositeInsertF64x3:
+ return IR::Opcode::CompositeInsertF32x3;
+ case IR::Opcode::CompositeInsertF64x4:
+ return IR::Opcode::CompositeInsertF32x4;
+ case IR::Opcode::FPOrdEqual64:
+ return IR::Opcode::FPOrdEqual32;
+ case IR::Opcode::FPUnordEqual64:
+ return IR::Opcode::FPUnordEqual32;
+ case IR::Opcode::FPOrdNotEqual64:
+ return IR::Opcode::FPOrdNotEqual32;
+ case IR::Opcode::FPUnordNotEqual64:
+ return IR::Opcode::FPUnordNotEqual32;
+ case IR::Opcode::FPOrdLessThan64:
+ return IR::Opcode::FPOrdLessThan32;
+ case IR::Opcode::FPUnordLessThan64:
+ return IR::Opcode::FPUnordLessThan32;
+ case IR::Opcode::FPOrdGreaterThan64:
+ return IR::Opcode::FPOrdGreaterThan32;
+ case IR::Opcode::FPUnordGreaterThan64:
+ return IR::Opcode::FPUnordGreaterThan32;
+ case IR::Opcode::FPOrdLessThanEqual64:
+ return IR::Opcode::FPOrdLessThanEqual32;
+ case IR::Opcode::FPUnordLessThanEqual64:
+ return IR::Opcode::FPUnordLessThanEqual32;
+ case IR::Opcode::FPOrdGreaterThanEqual64:
+ return IR::Opcode::FPOrdGreaterThanEqual32;
+ case IR::Opcode::FPUnordGreaterThanEqual64:
+ return IR::Opcode::FPUnordGreaterThanEqual32;
+ case IR::Opcode::FPIsNan64:
+ return IR::Opcode::FPIsNan32;
+ case IR::Opcode::ConvertS16F64:
+ return IR::Opcode::ConvertS16F32;
+ case IR::Opcode::ConvertS32F64:
+ return IR::Opcode::ConvertS32F32;
+ case IR::Opcode::ConvertS64F64:
+ return IR::Opcode::ConvertS64F32;
+ case IR::Opcode::ConvertU16F64:
+ return IR::Opcode::ConvertU16F32;
+ case IR::Opcode::ConvertU32F64:
+ return IR::Opcode::ConvertU32F32;
+ case IR::Opcode::ConvertU64F64:
+ return IR::Opcode::ConvertU64F32;
+ case IR::Opcode::ConvertF32F64:
+ return IR::Opcode::Identity;
+ case IR::Opcode::ConvertF64F32:
+ return IR::Opcode::Identity;
+ case IR::Opcode::ConvertF64S8:
+ return IR::Opcode::ConvertF32S8;
+ case IR::Opcode::ConvertF64S16:
+ return IR::Opcode::ConvertF32S16;
+ case IR::Opcode::ConvertF64S32:
+ return IR::Opcode::ConvertF32S32;
+ case IR::Opcode::ConvertF64S64:
+ return IR::Opcode::ConvertF32S64;
+ case IR::Opcode::ConvertF64U8:
+ return IR::Opcode::ConvertF32U8;
+ case IR::Opcode::ConvertF64U16:
+ return IR::Opcode::ConvertF32U16;
+ case IR::Opcode::ConvertF64U32:
+ return IR::Opcode::ConvertF32U32;
+ case IR::Opcode::ConvertF64U64:
+ return IR::Opcode::ConvertF32U64;
+ default:
+ return op;
+ }
+}
+
+void Lower(IR::Block& block, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::PackDouble2x32: {
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0)));
+ break;
+ }
+ case IR::Opcode::UnpackDouble2x32: {
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0)));
+ break;
+ }
+ default:
+ inst.ReplaceOpcode(Replace(inst.GetOpcode()));
+ break;
+ }
+}
+
+} // Anonymous namespace
+
+void LowerFp64ToFp32(IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ Lower(*block, inst);
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 1f8f2ba95..629d18fa1 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -13,10 +13,12 @@ struct HostTranslateInfo;
namespace Shader::Optimization {
void CollectShaderInfoPass(Environment& env, IR::Program& program);
+void ConditionalBarrierPass(IR::Program& program);
void ConstantPropagationPass(Environment& env, IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program);
void IdentityRemovalPass(IR::Program& program);
+void LowerFp64ToFp32(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);
void RescalingPass(IR::Program& program);
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 9f88fb440..9ca97f6a4 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -43,6 +43,7 @@ struct Profile {
bool support_gl_variable_aoffi{};
bool support_gl_sparse_textures{};
bool support_gl_derivative_control{};
+ bool support_scaled_attributes{};
bool warp_size_potentially_larger_than_guest{};
@@ -77,6 +78,8 @@ struct Profile {
bool has_gl_bool_ref_bug{};
/// Ignores SPIR-V ordered vs unordered using GLSL semantics
bool ignore_nan_fp_comparisons{};
+ /// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs
+ bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{};
u32 gl_max_compute_smem_size{};
};
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index 549b81ef7..3b63c249f 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -17,6 +17,8 @@ enum class AttributeType : u8 {
Float,
SignedInt,
UnsignedInt,
+ SignedScaled,
+ UnsignedScaled,
Disabled,
};