summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp17
-rw-r--r--src/shader_recompiler/profile.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp1
3 files changed, 19 insertions, 1 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index c5db19d09..77ff8c573 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -17,7 +17,22 @@ Id GetThreadId(EmitContext& ctx) {
Id WarpExtract(EmitContext& ctx, Id value) {
const Id thread_id{GetThreadId(ctx)};
const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
- return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
+ if (ctx.profile.has_broken_spirv_subgroup_mask_vector_extract_dynamic) {
+ const Id c0_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(0U)),
+ ctx.OpCompositeExtract(ctx.U32[1], value, 0U), ctx.Const(0U))};
+ const Id c1_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(1U)),
+ ctx.OpCompositeExtract(ctx.U32[1], value, 1U), ctx.Const(0U))};
+ const Id c2_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(2U)),
+ ctx.OpCompositeExtract(ctx.U32[1], value, 2U), ctx.Const(0U))};
+ const Id c3_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(3U)),
+ ctx.OpCompositeExtract(ctx.U32[1], value, 3U), ctx.Const(0U))};
+ const Id c0_or_c1{ctx.OpBitwiseOr(ctx.U32[1], c0_sel, c1_sel)};
+ const Id c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c2_sel, c3_sel)};
+ const Id c0_or_c1_or_c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c0_or_c1, c2_or_c3)};
+ return c0_or_c1_or_c2_or_c3;
+ } else {
+ return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
+ }
}
Id LoadMask(EmitContext& ctx, Id mask) {
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 3bb4a7e6f..9ca97f6a4 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -78,6 +78,8 @@ struct Profile {
bool has_gl_bool_ref_bug{};
/// Ignores SPIR-V ordered vs unordered using GLSL semantics
bool ignore_nan_fp_comparisons{};
+ /// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs
+ bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{};
u32 gl_max_compute_smem_size{};
};
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index e39713761..e506a8b30 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -351,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.has_broken_signed_operations = false,
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
.ignore_nan_fp_comparisons = false,
+ .has_broken_spirv_subgroup_mask_vector_extract_dynamic = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY
};
host_info = Shader::HostTranslateInfo{
.support_float16 = device.IsFloat16Supported(),