From 3d07cef009cf9e287744c7771c67166ef5761ce8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:27:17 -0400 Subject: shader: Implement VOTE --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 4 ++ src/shader_recompiler/backend/spirv/emit_context.h | 3 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 9 ++++ src/shader_recompiler/backend/spirv/emit_spirv.h | 4 ++ .../backend/spirv/emit_spirv_vote.cpp | 58 ++++++++++++++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 16 ++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 5 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 6 +++ .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/vote.cpp | 52 +++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 6 +++ src/shader_recompiler/profile.h | 2 + src/shader_recompiler/shader_info.h | 1 + .../renderer_vulkan/vk_compute_pipeline.cpp | 7 ++- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 + src/video_core/vulkan_common/vulkan_device.cpp | 1 + src/video_core/vulkan_common/vulkan_device.h | 6 +++ 18 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 23cb523a8..086bdf8d0 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -15,6 +15,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_memory.cpp backend/spirv/emit_spirv_select.cpp backend/spirv/emit_spirv_undefined.cpp + backend/spirv/emit_spirv_vote.cpp environment.h exception.h file_environment.cpp @@ -122,6 +123,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp + frontend/maxwell/translate/impl/vote.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4a4de3676..36f130781 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -259,6 +259,10 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) { if (info.uses_local_invocation_id) { local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); } + if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { + subgroup_local_invocation_id = + DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); + } if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 9b9e0d6b1..6e64360bf 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -82,6 +82,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id subgroup_local_invocation_id{}; Id instance_id{}; Id instance_index{}; Id base_instance{}; @@ -96,7 +97,7 @@ public: std::array output_generics{}; std::array frag_color{}; - Id frag_depth {}; + Id frag_depth{}; std::vector interfaces; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 93e851133..107403912 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -224,6 +224,15 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } + if (info.uses_subgroup_vote && profile.support_vote) { + ctx.AddExtension("SPV_KHR_shader_ballot"); + ctx.AddCapability(spv::Capability::SubgroupBallotKHR); + if (!profile.warp_size_potentially_larger_than_guest) { + // vote ops are only used when not taking the long path + ctx.AddExtension("SPV_KHR_subgroup_vote"); + ctx.AddCapability(spv::Capability::SubgroupVoteKHR); + } + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 960d022ff..ce23200f2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -346,5 +346,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id coords, Id dref, Id bias_lc, Id offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id lod_lc, Id offset); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp new file mode 100644 index 000000000..a63677ef2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id LargeWarpBallot(EmitContext& ctx, Id ballot) { + const Id shift{ctx.Constant(ctx.U32[1], 5)}; + const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); +} +} // Anonymous namespace + +Id EmitVoteAll(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpIEqual(ctx.U1, lhs, active_mask); +} + +Id EmitVoteAny(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAnyKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); +} + +Id EmitVoteEqual(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), + ctx.OpIEqual(ctx.U1, lhs, active_mask)); +} + +Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { + const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); + } + return LargeWarpBallot(ctx, ballot); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 432dd29a5..ff2970125 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1444,4 +1444,20 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); } +U1 IREmitter::VoteAll(const U1& value) { + return Inst(Opcode::VoteAll, value); +} + +U1 IREmitter::VoteAny(const U1& value) { + return Inst(Opcode::VoteAny, value); +} + +U1 IREmitter::VoteEqual(const U1& value) { + return Inst(Opcode::VoteEqual, value); +} + +U32 IREmitter::SubgroupBallot(const U1& value) { + return Inst(Opcode::SubgroupBallot, value); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 346cef3ab..1708be3ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -234,6 +234,11 @@ public: const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); + [[nodiscard]] U1 VoteAny(const U1& value); + [[nodiscard]] U1 VoteEqual(const U1& value); + [[nodiscard]] U32 SubgroupBallot(const U1& value); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index bdc07b9a7..fe888b8b2 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -355,3 +355,9 @@ OPCODE(ImageSampleImplicitLod, F32x4, U32, OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +// Vote operations +OPCODE(VoteAll, U1, U1, ) +OPCODE(VoteAny, U1, U1, ) +OPCODE(VoteEqual, U1, U1, ) +OPCODE(SubgroupBallot, U32, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 27b12ff3c..c0e36a7e2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -417,10 +417,6 @@ void TranslatorVisitor::VMNMX(u64) { ThrowNotImplemented(Opcode::VMNMX); } -void TranslatorVisitor::VOTE(u64) { - ThrowNotImplemented(Opcode::VOTE); -} - void TranslatorVisitor::VOTE_vtg(u64) { ThrowNotImplemented(Opcode::VOTE_vtg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..a88894a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -0,0 +1,52 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class VoteOp : u64 { + ALL, + ANY, + EQ, +}; + +[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { + switch (vote_op) { + case VoteOp::ALL: + return ir.VoteAll(pred); + case VoteOp::ANY: + return ir.VoteAny(pred); + case VoteOp::EQ: + return ir.VoteEqual(pred); + default: + throw NotImplementedException("Invalid VOTE op {}", vote_op); + } +} + +void Vote(TranslatorVisitor& v, u64 insn) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 3, IR::Pred> pred_a; + BitField<42, 1, u64> neg_pred_a; + BitField<45, 3, IR::Pred> pred_b; + BitField<48, 2, VoteOp> vote_op; + } const vote{insn}; + + const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; + v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); + v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); +} +} // Anonymous namespace + +void TranslatorVisitor::VOTE(u64 insn) { + Vote(*this, insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f44eac5d8..db5138e4d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -359,6 +359,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::VoteAll: + case IR::Opcode::VoteAny: + case IR::Opcode::VoteEqual: + case IR::Opcode::SubgroupBallot: + info.uses_subgroup_vote = true; + break; default: break; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3181c79fb..b57cbc310 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -19,6 +19,8 @@ struct Profile { bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; + bool support_vote{}; + bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f97730b34..3d9f04d1a 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -80,6 +80,7 @@ struct Info { bool uses_sampled_1d{}; bool uses_sparse_residency{}; bool uses_demote_to_helper_invocation{}; + bool uses_subgroup_vote{}; IR::Type used_constant_buffer_types{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6684d37a6..8e544d745 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -36,13 +36,18 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *spv_module, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 41fc9588f..bdbc8dd1e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -455,6 +455,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_vote = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f0e5b098c..009b74f12 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -737,6 +737,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { subgroup_properties.maxSubgroupSize >= GuestWarpSize) { extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; + ext_subgroup_size_control = true; } } else { is_warp_potentially_bigger = true; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 82bccc8f0..c268a4f8d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -193,6 +193,11 @@ public: return ext_shader_viewport_index_layer; } + /// Returns true if the device supports VK_EXT_subgroup_size_control. + bool IsExtSubgroupSizeControlSupported() const { + return ext_subgroup_size_control; + } + /// Returns true if the device supports VK_EXT_transform_feedback. bool IsExtTransformFeedbackSupported() const { return ext_transform_feedback; @@ -297,6 +302,7 @@ private: bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. -- cgit v1.2.3