diff options
Diffstat (limited to '')
4 files changed, 145 insertions, 8 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2d29d8c14..2885e6799 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,6 +15,8 @@ namespace Shader::Backend::SPIRV { namespace { +constexpr size_t NUM_FIXEDFNCTEXTURE = 10; + enum class Operation { Increment, Decrement, @@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { return pointer_type; } } + +size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations, + size_t start_offset) { + for (size_t location = start_offset; location < used_locations.size(); ++location) { + if (!used_locations.test(location)) { + return location; + } + } + throw RuntimeError("Unable to get an unused location for legacy attribute"); +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } + std::bitset<IR::NUM_GENERICS> used_locations{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { const AttributeType input_type{runtime_info.generic_input_types[index]}; if (!runtime_info.previous_stage_stores.Generic(index)) { @@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { if (input_type == AttributeType::Disabled) { continue; } + used_locations.set(index); const Id type{GetAttributeType(*this, input_type)}; const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); @@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) { break; } } + size_t previous_unused_location = 0; + if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineInput(*this, F32[4], true)}; + Decorate(id, spv::Decoration::Location, location); + input_front_color = id; + } + for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { + if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + const size_t location = + FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineInput(*this, F32[4], true)}; + Decorate(id, spv::Decoration::Location, location); + input_fixed_fnc_textures[index] = id; + } + } if (stage == Stage::TessellationEval) { for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } + std::bitset<IR::NUM_GENERICS> used_locations{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); + used_locations.set(index); + } + } + size_t previous_unused_location = 0; + if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineOutput(*this, F32[4], invocations)}; + Decorate(id, spv::Decoration::Location, static_cast<u32>(location)); + output_front_color = id; + } + for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { + if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + const size_t location = + FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineOutput(*this, F32[4], invocations)}; + Decorate(id, spv::Decoration::Location, location); + output_fixed_fnc_textures[index] = id; } } switch (stage) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e277bc358..847d0c0e6 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -268,10 +268,14 @@ public: Id write_global_func_u32x4{}; Id input_position{}; + Id input_front_color{}; + std::array<Id, 10> input_fixed_fnc_textures{}; std::array<Id, 32> input_generics{}; Id output_point_size{}; Id output_position{}; + Id output_front_color{}; + std::array<Id, 10> output_fixed_fnc_textures{}; std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; Id output_tess_level_outer{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 14c77f162..68f360b3c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... } } +bool IsFixedFncTexture(IR::Attribute attribute) { + return attribute >= IR::Attribute::FixedFncTexture0S && + attribute <= IR::Attribute::FixedFncTexture9Q; +} + +u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) { + if (!IsFixedFncTexture(attribute)) { + throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); + } + return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u; +} + +u32 FixedFncTextureAttributeElement(IR::Attribute attribute) { + if (!IsFixedFncTexture(attribute)) { + throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); + } + return static_cast<u32>(attribute) % 4u; +} + template <typename... Args> Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { if (ctx.stage == Stage::TessellationControl) { @@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } + if (IsFixedFncTexture(attr)) { + const u32 index{FixedFncTextureAttributeIndex(attr)}; + const u32 element{FixedFncTextureAttributeElement(attr)}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index], + element_id); + } switch (attr) { case IR::Attribute::PointSize: return ctx.output_point_size; @@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const Id element_id{ctx.Const(element)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: { + const u32 element{static_cast<u32>(attr) % 4}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id); + } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: @@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const Id value{ctx.OpLoad(type->id, pointer)}; return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } + if (IsFixedFncTexture(attr)) { + const u32 index{FixedFncTextureAttributeIndex(attr)}; + const Id attr_id{ctx.input_fixed_fnc_textures[index]}; + const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))}; + return ctx.OpLoad(ctx.F32[1], attr_ptr); + } switch (attr) { case IR::Attribute::PrimitiveId: return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); @@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionW: return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: { + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color, + ctx.Const(element))); + } case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); @@ -333,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); } case IR::Attribute::FrontFace: - return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), - ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); + return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), + ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), + ctx.f32_zero_value); case IR::Attribute::PointSpriteS: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 78b1e1ba7..cef52c56e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,8 +7,13 @@ namespace Shader::Backend::SPIRV { namespace { +Id GetThreadId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id); +} + Id WarpExtract(EmitContext& ctx, Id value) { - const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } @@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { return ctx.OpSelect(ctx.U32[1], in_range, ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); } + +Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { + const Id thirty_two{ctx.Const(32u)}; + const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; + const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; + return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); +} } // Anonymous namespace Id EmitLaneId(EmitContext& ctx) { - const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id id{GetThreadId(ctx)}; if (!ctx.profile.warp_size_potentially_larger_than_guest) { return id; } @@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; - const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + const Id thirty_two{ctx.Const(32u)}; + const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; + const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; + const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; + index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); + clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); + } const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; @@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + clamp = GetUpperClamp(ctx, thread_id, clamp); + } const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; @@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + clamp = GetUpperClamp(ctx, thread_id, clamp); + } const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; @@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id thread_id{GetThreadId(ctx)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + clamp = GetUpperClamp(ctx, thread_id, clamp); + } const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |