summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorameerj <52414509+ameerj@users.noreply.github.com>2021-03-24 01:27:17 +0100
committerameerj <52414509+ameerj@users.noreply.github.com>2021-07-23 03:51:24 +0200
commit3d07cef009cf9e287744c7771c67166ef5761ce8 (patch)
tree78f4fea18d9facb72850b4c2fe115e96b7af8f26
parentvk_pipeline_cache: Fix ReleaseContents order (diff)
downloadyuzu-3d07cef009cf9e287744c7771c67166ef5761ce8.tar
yuzu-3d07cef009cf9e287744c7771c67166ef5761ce8.tar.gz
yuzu-3d07cef009cf9e287744c7771c67166ef5761ce8.tar.bz2
yuzu-3d07cef009cf9e287744c7771c67166ef5761ce8.tar.lz
yuzu-3d07cef009cf9e287744c7771c67166ef5761ce8.tar.xz
yuzu-3d07cef009cf9e287744c7771c67166ef5761ce8.tar.zst
yuzu-3d07cef009cf9e287744c7771c67166ef5761ce8.zip
-rw-r--r--src/shader_recompiler/CMakeLists.txt2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h3
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp9
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp58
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp16
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h5
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc6
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp4
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp52
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp6
-rw-r--r--src/shader_recompiler/profile.h2
-rw-r--r--src/shader_recompiler/shader_info.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp1
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h6
18 files changed, 182 insertions, 6 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 23cb523a8..086bdf8d0 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -15,6 +15,7 @@ add_library(shader_recompiler STATIC
backend/spirv/emit_spirv_memory.cpp
backend/spirv/emit_spirv_select.cpp
backend/spirv/emit_spirv_undefined.cpp
+ backend/spirv/emit_spirv_vote.cpp
environment.h
exception.h
file_environment.cpp
@@ -122,6 +123,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/select_source_with_predicate.cpp
frontend/maxwell/translate/impl/texture_fetch.cpp
frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
+ frontend/maxwell/translate/impl/vote.cpp
frontend/maxwell/translate/translate.cpp
frontend/maxwell/translate/translate.h
ir_opt/collect_shader_info_pass.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 4a4de3676..36f130781 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -259,6 +259,10 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) {
if (info.uses_local_invocation_id) {
local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId);
}
+ if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) {
+ subgroup_local_invocation_id =
+ DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId);
+ }
if (info.loads_position) {
const bool is_fragment{stage != Stage::Fragment};
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 9b9e0d6b1..6e64360bf 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -82,6 +82,7 @@ public:
Id workgroup_id{};
Id local_invocation_id{};
+ Id subgroup_local_invocation_id{};
Id instance_id{};
Id instance_index{};
Id base_instance{};
@@ -96,7 +97,7 @@ public:
std::array<Id, 32> output_generics{};
std::array<Id, 8> frag_color{};
- Id frag_depth {};
+ Id frag_depth{};
std::vector<Id> interfaces;
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 93e851133..107403912 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -224,6 +224,15 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}
+ if (info.uses_subgroup_vote && profile.support_vote) {
+ ctx.AddExtension("SPV_KHR_shader_ballot");
+ ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
+ if (!profile.warp_size_potentially_larger_than_guest) {
+ // vote ops are only used when not taking the long path
+ ctx.AddExtension("SPV_KHR_subgroup_vote");
+ ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
+ }
+ }
// TODO: Track this usage
ctx.AddCapability(spv::Capability::ImageGatherExtended);
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 960d022ff..ce23200f2 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -346,5 +346,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id coords, Id dref, Id bias_lc, Id offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id lod_lc, Id offset);
+Id EmitVoteAll(EmitContext& ctx, Id pred);
+Id EmitVoteAny(EmitContext& ctx, Id pred);
+Id EmitVoteEqual(EmitContext& ctx, Id pred);
+Id EmitSubgroupBallot(EmitContext& ctx, Id pred);
} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp
new file mode 100644
index 000000000..a63677ef2
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id LargeWarpBallot(EmitContext& ctx, Id ballot) {
+ const Id shift{ctx.Constant(ctx.U32[1], 5)};
+ const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index);
+}
+} // Anonymous namespace
+
+Id EmitVoteAll(EmitContext& ctx, Id pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpSubgroupAllKHR(ctx.U1, pred);
+ }
+ const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
+ const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+ return ctx.OpIEqual(ctx.U1, lhs, active_mask);
+}
+
+Id EmitVoteAny(EmitContext& ctx, Id pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
+ }
+ const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
+ const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+ return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
+}
+
+Id EmitVoteEqual(EmitContext& ctx, Id pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
+ }
+ const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
+ const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
+ return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
+ ctx.OpIEqual(ctx.U1, lhs, active_mask));
+}
+
+Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
+ const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
+ }
+ return LargeWarpBallot(ctx, ballot);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 432dd29a5..ff2970125 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -1444,4 +1444,20 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor
return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset);
}
+U1 IREmitter::VoteAll(const U1& value) {
+ return Inst<U1>(Opcode::VoteAll, value);
+}
+
+U1 IREmitter::VoteAny(const U1& value) {
+ return Inst<U1>(Opcode::VoteAny, value);
+}
+
+U1 IREmitter::VoteEqual(const U1& value) {
+ return Inst<U1>(Opcode::VoteEqual, value);
+}
+
+U32 IREmitter::SubgroupBallot(const U1& value) {
+ return Inst<U32>(Opcode::SubgroupBallot, value);
+}
+
} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 346cef3ab..1708be3ef 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -234,6 +234,11 @@ public:
const Value& offset, const F32& lod_clamp,
TextureInstInfo info);
+ [[nodiscard]] U1 VoteAll(const U1& value);
+ [[nodiscard]] U1 VoteAny(const U1& value);
+ [[nodiscard]] U1 VoteEqual(const U1& value);
+ [[nodiscard]] U32 SubgroupBallot(const U1& value);
+
private:
IR::Block::iterator insertion_point;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index bdc07b9a7..fe888b8b2 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -355,3 +355,9 @@ OPCODE(ImageSampleImplicitLod, F32x4, U32,
OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
+
+// Vote operations
+OPCODE(VoteAll, U1, U1, )
+OPCODE(VoteAny, U1, U1, )
+OPCODE(VoteEqual, U1, U1, )
+OPCODE(SubgroupBallot, U32, U1, )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 27b12ff3c..c0e36a7e2 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -417,10 +417,6 @@ void TranslatorVisitor::VMNMX(u64) {
ThrowNotImplemented(Opcode::VMNMX);
}
-void TranslatorVisitor::VOTE(u64) {
- ThrowNotImplemented(Opcode::VOTE);
-}
-
void TranslatorVisitor::VOTE_vtg(u64) {
ThrowNotImplemented(Opcode::VOTE_vtg);
}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..a88894a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,52 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VoteOp : u64 {
+ ALL,
+ ANY,
+ EQ,
+};
+
+[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
+ switch (vote_op) {
+ case VoteOp::ALL:
+ return ir.VoteAll(pred);
+ case VoteOp::ANY:
+ return ir.VoteAny(pred);
+ case VoteOp::EQ:
+ return ir.VoteEqual(pred);
+ default:
+ throw NotImplementedException("Invalid VOTE op {}", vote_op);
+ }
+}
+
+void Vote(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 3, IR::Pred> pred_a;
+ BitField<42, 1, u64> neg_pred_a;
+ BitField<45, 3, IR::Pred> pred_b;
+ BitField<48, 2, VoteOp> vote_op;
+ } const vote{insn};
+
+ const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
+ v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
+ v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VOTE(u64 insn) {
+ Vote(*this, insn);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index f44eac5d8..db5138e4d 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -359,6 +359,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
break;
}
+ case IR::Opcode::VoteAll:
+ case IR::Opcode::VoteAny:
+ case IR::Opcode::VoteEqual:
+ case IR::Opcode::SubgroupBallot:
+ info.uses_subgroup_vote = true;
+ break;
default:
break;
}
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 3181c79fb..b57cbc310 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -19,6 +19,8 @@ struct Profile {
bool support_fp16_signed_zero_nan_preserve{};
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
+ bool support_vote{};
+ bool warp_size_potentially_larger_than_guest{};
// FClamp is broken and OpFMax + OpFMin should be used instead
bool has_broken_spirv_clamp{};
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index f97730b34..3d9f04d1a 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -80,6 +80,7 @@ struct Info {
bool uses_sampled_1d{};
bool uses_sparse_residency{};
bool uses_demote_to_helper_invocation{};
+ bool uses_subgroup_vote{};
IR::Type used_constant_buffer_types{};
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 6684d37a6..8e544d745 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -36,13 +36,18 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip
descriptor_update_template = std::move(tuple.descriptor_update_template);
descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
+ const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .requiredSubgroupSize = GuestWarpSize,
+ };
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .pNext = nullptr,
+ .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *spv_module,
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 41fc9588f..bdbc8dd1e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -455,6 +455,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
+ .support_vote = true,
+ .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
};
}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index f0e5b098c..009b74f12 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -737,6 +737,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
+ ext_subgroup_size_control = true;
}
} else {
is_warp_potentially_bigger = true;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 82bccc8f0..c268a4f8d 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -193,6 +193,11 @@ public:
return ext_shader_viewport_index_layer;
}
+ /// Returns true if the device supports VK_EXT_subgroup_size_control.
+ bool IsExtSubgroupSizeControlSupported() const {
+ return ext_subgroup_size_control;
+ }
+
/// Returns true if the device supports VK_EXT_transform_feedback.
bool IsExtTransformFeedbackSupported() const {
return ext_transform_feedback;
@@ -297,6 +302,7 @@ private:
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
+ bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.