diff options
Diffstat (limited to 'src/shader_recompiler')
7 files changed, 243 insertions, 12 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2d29d8c14..2885e6799 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,6 +15,8 @@ namespace Shader::Backend::SPIRV { namespace { +constexpr size_t NUM_FIXEDFNCTEXTURE = 10; + enum class Operation { Increment, Decrement, @@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { return pointer_type; } } + +size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations, + size_t start_offset) { + for (size_t location = start_offset; location < used_locations.size(); ++location) { + if (!used_locations.test(location)) { + return location; + } + } + throw RuntimeError("Unable to get an unused location for legacy attribute"); +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } + std::bitset<IR::NUM_GENERICS> used_locations{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { const AttributeType input_type{runtime_info.generic_input_types[index]}; if (!runtime_info.previous_stage_stores.Generic(index)) { @@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { if (input_type == AttributeType::Disabled) { continue; } + used_locations.set(index); const Id type{GetAttributeType(*this, input_type)}; const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); @@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) { break; } } + size_t previous_unused_location = 0; + if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineInput(*this, F32[4], true)}; + Decorate(id, spv::Decoration::Location, location); + input_front_color = id; + } + for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { + if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + const size_t location = + FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineInput(*this, F32[4], true)}; + Decorate(id, spv::Decoration::Location, location); + input_fixed_fnc_textures[index] = id; + } + } if (stage == Stage::TessellationEval) { for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } + std::bitset<IR::NUM_GENERICS> used_locations{}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); + used_locations.set(index); + } + } + size_t previous_unused_location = 0; + if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineOutput(*this, F32[4], invocations)}; + Decorate(id, spv::Decoration::Location, static_cast<u32>(location)); + output_front_color = id; + } + for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { + if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + const size_t location = + FindNextUnusedLocation(used_locations, previous_unused_location); + previous_unused_location = location; + used_locations.set(location); + const Id id{DefineOutput(*this, F32[4], invocations)}; + Decorate(id, spv::Decoration::Location, location); + output_fixed_fnc_textures[index] = id; } } switch (stage) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e277bc358..847d0c0e6 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -268,10 +268,14 @@ public: Id write_global_func_u32x4{}; Id input_position{}; + Id input_front_color{}; + std::array<Id, 10> input_fixed_fnc_textures{}; std::array<Id, 32> input_generics{}; Id output_point_size{}; Id output_position{}; + Id output_front_color{}; + std::array<Id, 10> output_fixed_fnc_textures{}; std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; Id output_tess_level_outer{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 14c77f162..68f360b3c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... } } +bool IsFixedFncTexture(IR::Attribute attribute) { + return attribute >= IR::Attribute::FixedFncTexture0S && + attribute <= IR::Attribute::FixedFncTexture9Q; +} + +u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) { + if (!IsFixedFncTexture(attribute)) { + throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); + } + return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u; +} + +u32 FixedFncTextureAttributeElement(IR::Attribute attribute) { + if (!IsFixedFncTexture(attribute)) { + throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); + } + return static_cast<u32>(attribute) % 4u; +} + template <typename... Args> Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { if (ctx.stage == Stage::TessellationControl) { @@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } + if (IsFixedFncTexture(attr)) { + const u32 index{FixedFncTextureAttributeIndex(attr)}; + const u32 element{FixedFncTextureAttributeElement(attr)}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index], + element_id); + } switch (attr) { case IR::Attribute::PointSize: return ctx.output_point_size; @@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const Id element_id{ctx.Const(element)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: { + const u32 element{static_cast<u32>(attr) % 4}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id); + } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: @@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const Id value{ctx.OpLoad(type->id, pointer)}; return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } + if (IsFixedFncTexture(attr)) { + const u32 index{FixedFncTextureAttributeIndex(attr)}; + const Id attr_id{ctx.input_fixed_fnc_textures[index]}; + const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))}; + return ctx.OpLoad(ctx.F32[1], attr_ptr); + } switch (attr) { case IR::Attribute::PrimitiveId: return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); @@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionW: return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: { + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color, + ctx.Const(element))); + } case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); @@ -333,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); } case IR::Attribute::FrontFace: - return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), - ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); + return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), + ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), + ctx.f32_zero_value); case IR::Attribute::PointSpriteS: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 8b3e0a15c..69eeaa3e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -20,6 +20,7 @@ #include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { @@ -652,7 +653,7 @@ class TranslatePass { public: TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, - IR::AbstractSyntaxList& syntax_list_) + IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); @@ -660,6 +661,9 @@ public: IR::Block& first_block{*syntax_list.front().data.block}; IR::IREmitter ir(first_block, first_block.begin()); ir.Prologue(); + if (uses_demote_to_helper && host_info.needs_demote_reorder) { + DemoteCombinationPass(); + } } private: @@ -809,7 +813,14 @@ private: } case StatementType::Return: { ensure_block(); - IR::IREmitter{*current_block}.Epilogue(); + IR::Block* return_block{block_pool.Create(inst_pool)}; + IR::IREmitter{*return_block}.Epilogue(); + current_block->AddBranch(return_block); + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = return_block; + current_block = nullptr; syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; @@ -824,6 +835,7 @@ private: auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; merge.data.block = demote_block; + uses_demote_to_helper = true; break; } case StatementType::Unreachable: { @@ -855,11 +867,117 @@ private: return block_pool.Create(inst_pool); } + void DemoteCombinationPass() { + using Type = IR::AbstractSyntaxNode::Type; + std::vector<IR::Block*> demote_blocks; + std::vector<IR::U1> demote_conds; + u32 num_epilogues{}; + u32 branch_depth{}; + for (const IR::AbstractSyntaxNode& node : syntax_list) { + if (node.type == Type::If) { + ++branch_depth; + } + if (node.type == Type::EndIf) { + --branch_depth; + } + if (node.type != Type::Block) { + continue; + } + if (branch_depth > 1) { + // Skip reordering nested demote branches. + continue; + } + for (const IR::Inst& inst : node.data.block->Instructions()) { + const IR::Opcode op{inst.GetOpcode()}; + if (op == IR::Opcode::DemoteToHelperInvocation) { + demote_blocks.push_back(node.data.block); + break; + } + if (op == IR::Opcode::Epilogue) { + ++num_epilogues; + } + } + } + if (demote_blocks.size() == 0) { + return; + } + if (num_epilogues > 1) { + LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented."); + return; + } + s64 last_iterator_offset{}; + auto& asl{syntax_list}; + for (const IR::Block* demote_block : demote_blocks) { + const auto start_it{asl.begin() + last_iterator_offset}; + auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::If && asn.data.if_node.body == demote_block; + })}; + if (asl_it == asl.end()) { + // Demote without a conditional branch. + // No need to proceed since all fragment instances will be demoted regardless. + return; + } + const IR::Block* const end_if = asl_it->data.if_node.merge; + demote_conds.push_back(asl_it->data.if_node.cond); + last_iterator_offset = std::distance(asl.begin(), asl_it); + + asl_it = asl.erase(asl_it); + asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::Block && asn.data.block == demote_block; + }); + + asl_it = asl.erase(asl_it); + asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { + return asn.type == Type::EndIf && asn.data.end_if.merge == end_if; + }); + asl_it = asl.erase(asl_it); + } + const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) { + if (asn.type != Type::Block) { + return false; + } + for (const auto& inst : asn.data.block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return true; + } + } + return false; + }}; + const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)}; + const auto return_block_it{(reverse_it + 1).base()}; + + IR::IREmitter ir{*(return_block_it - 1)->data.block}; + IR::U1 cond(IR::Value(false)); + for (const auto& demote_cond : demote_conds) { + cond = ir.LogicalOr(cond, demote_cond); + } + cond.Inst()->DestructiveAddUsage(1); + + IR::AbstractSyntaxNode demote_if_node{}; + demote_if_node.type = Type::If; + demote_if_node.data.if_node.cond = cond; + demote_if_node.data.if_node.body = demote_blocks[0]; + demote_if_node.data.if_node.merge = return_block_it->data.block; + + IR::AbstractSyntaxNode demote_node{}; + demote_node.type = Type::Block; + demote_node.data.block = demote_blocks[0]; + + IR::AbstractSyntaxNode demote_endif_node{}; + demote_endif_node.type = Type::EndIf; + demote_endif_node.data.end_if.merge = return_block_it->data.block; + + asl.insert(return_block_it, demote_endif_node); + asl.insert(return_block_it, demote_node); + asl.insert(return_block_it, demote_if_node); + } + ObjectPool<Statement>& stmt_pool; ObjectPool<IR::Inst>& inst_pool; ObjectPool<IR::Block>& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; + bool uses_demote_to_helper{}; // TODO: C++20 Remove this when all compilers support constexpr std::vector #if __cpp_lib_constexpr_vector >= 201907 @@ -871,12 +989,13 @@ private: } // Anonymous namespace IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, + const HostTranslateInfo& host_info) { ObjectPool<Statement> stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info}; return syntax_list; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index 88b083649..e38158da3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,10 +11,13 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" -namespace Shader::Maxwell { +namespace Shader { +struct HostTranslateInfo; +namespace Maxwell { [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); -} // namespace Shader::Maxwell +} // namespace Maxwell +} // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c067d459c..012d55357 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) { IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; - program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = PostOrder(program.syntax_list.front()); program.stage = env.ShaderStage(); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 94a584219..96468b2e7 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -11,8 +11,9 @@ namespace Shader { /// Misc information about the host struct HostTranslateInfo { - bool support_float16{}; ///< True when the device supports 16-bit floats - bool support_int64{}; ///< True when the device supports 64-bit integers + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers + bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered }; } // namespace Shader |