diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/ast.h | 10 | ||||
-rw-r--r-- | src/video_core/shader/const_buffer_locker.cpp | 17 | ||||
-rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 21 | ||||
-rw-r--r-- | src/video_core/shader/decode.cpp | 68 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic.cpp | 11 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/decode/bfi.cpp | 7 | ||||
-rw-r--r-- | src/video_core/shader/decode/other.cpp | 9 | ||||
-rw-r--r-- | src/video_core/shader/decode/shift.cpp | 113 | ||||
-rw-r--r-- | src/video_core/shader/decode/texture.cpp | 114 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 89 | ||||
-rw-r--r-- | src/video_core/shader/node_helper.h | 6 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.cpp | 9 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 16 | ||||
-rw-r--r-- | src/video_core/shader/track.cpp | 106 |
15 files changed, 529 insertions, 69 deletions
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index a2f0044ba..cca13bcde 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -65,8 +65,8 @@ public: void DetachSegment(ASTNode start, ASTNode end); void Remove(ASTNode node); - ASTNode first{}; - ASTNode last{}; + ASTNode first; + ASTNode last; }; class ASTProgram { @@ -299,9 +299,9 @@ private: friend class ASTZipper; ASTData data; - ASTNode parent{}; - ASTNode next{}; - ASTNode previous{}; + ASTNode parent; + ASTNode next; + ASTNode previous; ASTZipper* manager{}; }; diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index a4a0319eb..0638be8cb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle return value; } +std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { + if (bound_buffer_saved) { + return bound_buffer; + } + if (!engine) { + return std::nullopt; + } + bound_buffer_saved = true; + bound_buffer = engine->GetBoundBuffer(); + return bound_buffer; +} + void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { keys.insert_or_assign({buffer, offset}, value); } @@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes bindless_samplers.insert_or_assign({buffer, offset}, sampler); } +void ConstBufferLocker::SetBoundBuffer(u32 buffer) { + bound_buffer_saved = true; + bound_buffer = buffer; +} + bool ConstBufferLocker::IsConsistent() const { if (!engine) { return false; diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d32e2d657..d3ea11087 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -10,6 +10,7 @@ #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" namespace VideoCommon::Shader { @@ -40,6 +41,8 @@ public: std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); + std::optional<u32> ObtainBoundBuffer(); + /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); @@ -49,6 +52,9 @@ public: /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + /// Set the bound buffer for this locker. + void SetBoundBuffer(u32 buffer); + /// Checks keys and samplers against engine's current const buffers. Returns true if they are /// the same value, false otherwise; bool IsConsistent() const; @@ -71,12 +77,27 @@ public: return bindless_samplers; } + /// Gets bound buffer used on this shader + u32 GetBoundBuffer() const { + return bound_buffer; + } + + /// Obtains access to the guest driver's profile. + VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { + if (engine) { + return &engine->AccessGuestDriverProfile(); + } + return nullptr; + } + private: const Tegra::Engines::ShaderType stage; Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; KeyMap keys; BoundSamplerMap bound_samplers; BindlessSamplerMap bindless_samplers; + bool bound_buffer_saved{}; + u32 bound_buffer{}; }; } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 22c3e5120..6b697ed5d 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <cstring> +#include <limits> #include <set> #include <fmt/format.h> @@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, + const std::list<Sampler>& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); + return; + } + if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { + return; + } + u32 count{}; + std::vector<u32> bound_offsets; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + ++count; + bound_offsets.emplace_back(sampler.GetOffset()); + } + if (count > 1) { + gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); + } +} + +std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, + VideoCore::GuestDriverProfile* gpu_driver, + const std::list<Sampler>& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); + return std::nullopt; + } + const u32 base_offset = sampler_to_deduce.GetOffset(); + u32 max_offset{std::numeric_limits<u32>::max()}; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + if (sampler.GetOffset() > base_offset) { + max_offset = std::min(sampler.GetOffset(), max_offset); + } + } + if (max_offset == std::numeric_limits<u32>::max()) { + return std::nullopt; + } + return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); +} + } // Anonymous namespace class ASTDecoder { @@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { return pc + 1; } +void ShaderIR::PostDecode() { + // Deduce texture handler size if needed + auto gpu_driver = locker.AccessGuestDriverProfile(); + DeduceTextureHandlerSize(gpu_driver, used_samplers); + // Deduce Indexed Samplers + if (!uses_indexed_samplers) { + return; + } + for (auto& sampler : used_samplers) { + if (!sampler.IsIndexed()) { + continue; + } + if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { + sampler.SetSize(*size); + } else { + LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); + sampler.SetSize(1); + } + } +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index fcedd2af6..90240c765 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -21,7 +21,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() -> Node { + Node op_b = [&] { if (instr.is_b_imm) { return GetImmediate19(instr); } else if (instr.is_b_gpr) { @@ -141,6 +141,15 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::FCMP_R: { + UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); + Node op_c = GetRegister(instr.gpr39); + Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); + SetRegister( + bb, instr.gpr0, + Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); + break; + } case OpCode::Id::RRO_C: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 371fae127..e60875cc4 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -297,7 +297,7 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod const Node one = Immediate(1); const Node two = Immediate(2); - Node value{}; + Node value; for (u32 i = 0; i < lop_iterations; ++i) { const Node shift_amount = Immediate(i); diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 8be1119df..f992bbe2a 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -17,10 +17,13 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> { + const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { + case OpCode::Id::BFI_RC: + return {GetRegister(instr.gpr39), + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; case OpCode::Id::BFI_IMM_R: - return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; + return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: UNREACHABLE(); return {Immediate(0), Immediate(0)}; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 7321698b2..4944e9d69 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -69,13 +69,16 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { case OpCode::Id::MOV_SYS: { const Node value = [this, instr] { switch (instr.sys20) { + case SystemVariable::LaneId: + LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); + return Immediate(0U); case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); - return Immediate(0u); + return Immediate(0U); case SystemVariable::Tid: { Node value = Immediate(0); value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); @@ -188,7 +191,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", static_cast<u32>(cc)); - if (disable_flow_stack) { + if (decompiled) { break; } @@ -200,7 +203,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", static_cast<u32>(cc)); - if (disable_flow_stack) { + if (decompiled) { break; } diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index d419e9c45..3b391d3e6 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -10,8 +10,80 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::ShfType; +using Tegra::Shader::ShfXmode; + +namespace { + +Node IsFull(Node shift) { + return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); +} + +Node Shift(OperationCode opcode, Node value, Node shift) { + Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32)); + Node shifted = Operation(opcode, move(value), shift); + return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); +} + +Node ClampShift(Node shift, s32 size = 32) { + shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); + return Operation(OperationCode::IMin, move(shift), Immediate(size)); +} + +Node WrapShift(Node shift, s32 size = 32) { + return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); +} + +Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { + // These values are used when the shift value is less than 32 + Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); + Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); + Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); + + if (type == ShfType::Bits32) { + // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits + return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); + } + + // And these when it's larger than or 32 + const bool is_signed = type == ShfType::S64; + const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); + Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); + Node greater = Shift(opcode, high, move(reduced)); + + Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); + Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); + + Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); + return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); +} + +Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { + // These values are used when the shift value is less than 32 + Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); + Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); + Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); + + if (type == ShfType::Bits32) { + // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits + return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); + } + + // And these when it's larger than or 32 + Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); + Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); + + Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); + Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); + + Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); + return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); +} + +} // Anonymous namespace u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -28,29 +100,48 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { } }(); - switch (opcode->get().GetId()) { + switch (const auto opid = opcode->get().GetId(); opid) { case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - if (instr.shr.wrap) { - op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f)); - } else { - op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0)); - op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31)); - } + op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, - std::move(op_a), std::move(op_b)); + move(op_a), move(op_b)); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, move(value)); break; } case OpCode::Id::SHL_C: case OpCode::Id::SHL_R: case OpCode::Id::SHL_IMM: { - const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); + Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetRegister(bb, instr.gpr0, move(value)); + break; + } + case OpCode::Id::SHF_RIGHT_R: + case OpCode::Id::SHF_RIGHT_IMM: + case OpCode::Id::SHF_LEFT_R: + case OpCode::Id::SHF_LEFT_IMM: { + UNIMPLEMENTED_IF(instr.generates_cc); + UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", + static_cast<int>(instr.shf.xmode.Value())); + + if (instr.is_b_imm) { + op_b = Immediate(static_cast<u32>(instr.shf.immediate)); + } + const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; + Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); + + Node negated_shift = Operation(OperationCode::INegate, shift); + Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); + + const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; + Node value = (is_right ? ShiftRight : ShiftLeft)( + move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); + + SetRegister(bb, instr.gpr0, move(value)); break; } default: diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b567e39d..351c8c2f1 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, + {}, {}, component, element, {}}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto derivate_reg = instr.gpr20.Value(); const auto texture_type = instr.txd.texture_type.Value(); const auto coord_count = GetCoordCount(texture_type); - + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) + is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) : GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; if (sampler == nullptr) { @@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; + MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, + {}, {}, {}, element, index_var}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } @@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { continue; } auto params = coords; - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporary(bb, indexer++, value); } @@ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, // Otherwise create a new mapping for this sampler const auto next_index = static_cast<u32>(used_samplers.size()); return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, - info.is_buffer); + info.is_buffer, false); } -const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, +const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); - const auto [base_sampler, buffer, offset] = - TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_sampler != nullptr); - if (base_sampler == nullptr) { + const auto [base_node, tracked_sampler_info] = + TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); + ASSERT(base_node != nullptr); + if (base_node == nullptr) { return nullptr; } - const auto info = GetSamplerInfo(sampler_info, offset, buffer); + if (const auto bindless_sampler_info = + std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { + const u32 buffer = bindless_sampler_info->GetIndex(); + const u32 offset = bindless_sampler_info->GetOffset(); + const auto info = GetSamplerInfo(sampler_info, offset, buffer); + + // If this sampler has already been used, return the existing mapping. + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [buffer = buffer, offset = offset](const Sampler& entry) { + return entry.GetBuffer() == buffer && entry.GetOffset() == offset; + }); + if (it != used_samplers.end()) { + ASSERT(it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); + return &*it; + } - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer = buffer, offset = offset](const Sampler& entry) { - return entry.GetBuffer() == buffer && entry.GetOffset() == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && - it->IsShadow() == info.is_shadow); - return &*it; - } + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast<u32>(used_samplers.size()); + return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, + info.is_shadow, info.is_buffer, false); + } else if (const auto array_sampler_info = + std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { + const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; + index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); + const auto info = GetSamplerInfo(sampler_info, base_offset); + + // If this sampler has already been used, return the existing mapping. + const auto it = std::find_if( + used_samplers.begin(), used_samplers.end(), + [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; }); + if (it != used_samplers.end()) { + ASSERT(!it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && + it->IsBuffer() == info.is_buffer && it->IsIndexed()); + return &*it; + } - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, - info.is_shadow, info.is_buffer); + uses_indexed_samplers = true; + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast<u32>(used_samplers.size()); + return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, + info.is_shadow, info.is_buffer, true); + } + return nullptr; } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, "This method is not supported."); const SamplerInfo info{texture_type, is_array, is_shadow, false}; - const Sampler* sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); + Node index_var{}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) + : GetSampler(instr.sampler, info); Node4 values; if (sampler == nullptr) { for (u32 element = 0; element < values.size(); ++element) { @@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; + MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, + lod, {}, element, index_var}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -596,7 +630,7 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); } - Node dc{}; + Node dc; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used @@ -632,7 +666,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, const Node array = is_array ? GetRegister(array_register) : nullptr; - Node dc{}; + Node dc; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used @@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de u64 parameter_register = instr.gpr20.Value(); const SamplerInfo info{texture_type, is_array, depth_compare, false}; - const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) + Node index_var{}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info) : GetSampler(instr.sampler, info); Node4 values; if (sampler == nullptr) { @@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; + *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, + index_var}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } @@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 9af1f0228..a0a7b9111 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -212,6 +212,7 @@ enum class MetaStackClass { class OperationNode; class ConditionalNode; class GprNode; +class CustomVarNode; class ImmediateNode; class InternalFlagNode; class PredicateNode; @@ -223,26 +224,32 @@ class SmemNode; class GmemNode; class CommentNode; -using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, +using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>; using Node = std::shared_ptr<NodeData>; using Node4 = std::array<Node, 4>; using NodeBlock = std::vector<Node>; +class BindlessSamplerNode; +class ArraySamplerNode; + +using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; +using TrackSampler = std::shared_ptr<TrackSamplerData>; + class Sampler { public: /// This constructor is for bound samplers constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_buffer{is_buffer} {} + is_buffer{is_buffer}, is_indexed{is_indexed} {} /// This constructor is for bindless samplers constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} + is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} constexpr u32 GetIndex() const { return index; @@ -276,16 +283,72 @@ public: return is_bindless; } + constexpr bool IsIndexed() const { + return is_indexed; + } + + constexpr u32 Size() const { + return size; + } + + constexpr void SetSize(u32 new_size) { + size = new_size; + } + private: u32 index{}; ///< Emulated index given for the this sampler. u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). + u32 size{}; ///< Size of the sampler if indexed. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. + bool is_indexed{}; ///< Whether this sampler is an indexed array of textures. +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class ArraySamplerNode final { +public: + explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) + : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetBaseOffset() const { + return base_offset; + } + + constexpr u32 GetIndexVar() const { + return bindless_var; + } + +private: + u32 index; + u32 base_offset; + u32 bindless_var; +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class BindlessSamplerNode final { +public: + explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetOffset() const { + return offset; + } + +private: + u32 index; + u32 offset; }; class Image final { @@ -380,8 +443,9 @@ struct MetaTexture { std::vector<Node> derivates; Node bias; Node lod; - Node component{}; + Node component; u32 element{}; + Node index; }; struct MetaImage { @@ -488,6 +552,19 @@ private: Tegra::Shader::Register index{}; }; +/// A custom variable +class CustomVarNode final { +public: + explicit constexpr CustomVarNode(u32 index) : index{index} {} + + constexpr u32 GetIndex() const { + return index; + } + +private: + u32 index{}; +}; + /// A 32-bits value that represents an immediate value class ImmediateNode final { public: diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 0c2aa749b..11231bbea 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) { return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); } +template <typename T, typename... Args> +TrackSampler MakeTrackSampler(Args&&... args) { + static_assert(std::is_convertible_v<T, TrackSamplerData>); + return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); +} + template <typename... Args> Node Operation(OperationCode code, Args&&... args) { if constexpr (sizeof...(args) == 0) { diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 31eecb3f4..3a5d280a9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet ConstBufferLocker& locker) : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); + PostDecode(); } ShaderIR::~ShaderIR() = default; @@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) { return MakeNode<GprNode>(reg); } +Node ShaderIR::GetCustomVariable(u32 id) { + return MakeNode<CustomVarNode>(id); +} + Node ShaderIR::GetImmediate19(Instruction instr) { return Immediate(instr.alu.GetImm20_19()); } @@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) { return id; } +u32 ShaderIR::NewCustomVariable() { + return num_custom_variables++; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ba1db4c11..b0851c3be 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -180,6 +180,10 @@ public: return amend_code[index]; } + u32 GetNumCustomVariables() const { + return num_custom_variables; + } + private: friend class ASTDecoder; @@ -191,6 +195,7 @@ private: }; void Decode(); + void PostDecode(); NodeBlock DecodeRange(u32 begin, u32 end); void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); @@ -235,6 +240,8 @@ private: /// Generates a node for a passed register. Node GetRegister(Tegra::Shader::Register reg); + /// Generates a node for a custom variable + Node GetCustomVariable(u32 id); /// Generates a node representing a 19-bit immediate value Node GetImmediate19(Tegra::Shader::Instruction instr); /// Generates a node representing a 32-bit immediate value @@ -321,7 +328,7 @@ private: std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses a texture sampler for a bindless texture. - const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, + const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses an image. @@ -387,6 +394,9 @@ private: std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; + std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor); + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, @@ -399,6 +409,8 @@ private: /// Register new amending code and obtain the reference id. std::size_t DeclareAmend(Node new_amend); + u32 NewCustomVariable(); + const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; @@ -414,6 +426,7 @@ private: NodeBlock global_code; ASTManager program_manager{true, true}; std::vector<Node> amend_code; + u32 num_custom_variables{}; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; @@ -431,6 +444,7 @@ private: bool uses_instance_id{}; bool uses_vertex_id{}; bool uses_warps{}; + bool uses_indexed_samplers{}; Tegra::Shader::Header header; }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 165c79330..face8c943 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "video_core/shader/node.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, } return {}; } + +std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { + if (operation.GetCode() != OperationCode::UAdd) { + return std::nullopt; + } + Node gpr; + Node offset; + ASSERT(operation.GetOperandsCount() == 2); + for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { + Node operand = operation[i]; + if (std::holds_alternative<ImmediateNode>(*operand)) { + offset = operation[i]; + } else if (std::holds_alternative<GprNode>(*operand)) { + gpr = operation[i]; + } + } + if (offset && gpr) { + return std::make_pair(gpr, offset); + } + return std::nullopt; +} + +bool AmendNodeCv(std::size_t amend_index, Node node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { + operation->SetAmendIndex(amend_index); + return true; + } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { + conditional->SetAmendIndex(amend_index); + return true; + } + return false; +} + } // Anonymous namespace +std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor) { + if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { + // Constant buffer found, test if it's an immediate + const auto offset = cbuf->GetOffset(); + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { + auto track = + MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); + return {tracked, track}; + } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { + auto bound_buffer = locker.ObtainBoundBuffer(); + if (!bound_buffer) { + return {}; + } + if (*bound_buffer != cbuf->GetIndex()) { + return {}; + } + auto pair = DecoupleIndirectRead(*operation); + if (!pair) { + return {}; + } + auto [gpr, base_offset] = *pair; + const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); + auto gpu_driver = locker.AccessGuestDriverProfile(); + if (gpu_driver == nullptr) { + return {}; + } + const u32 bindless_cv = NewCustomVariable(); + const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, + Immediate(gpu_driver->GetTextureHandlerSize())); + + const Node cv_node = GetCustomVariable(bindless_cv); + Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); + const std::size_t amend_index = DeclareAmend(amend_op); + AmendNodeCv(amend_index, code[cursor]); + // TODO Implement Bindless Index custom variable + auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), + offset_inm->GetValue(), bindless_cv); + return {tracked, track}; + } + return {}; + } + if (const auto gpr = std::get_if<GprNode>(&*tracked)) { + if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { + return {}; + } + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same + // register that it uses as operand + const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); + if (!source) { + return {}; + } + return TrackBindlessSampler(source, code, new_cursor); + } + if (const auto operation = std::get_if<OperationNode>(&*tracked)) { + for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { + if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); + std::get<0>(found)) { + // Cbuf found in operand. + return found; + } + } + return {}; + } + if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { + const auto& conditional_code = conditional->GetCode(); + return TrackBindlessSampler(tracked, conditional_code, + static_cast<s64>(conditional_code.size())); + } + return {}; +} + std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |