// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once #include #include #include #include #include #include #include #include #include #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" namespace VideoCommon::Shader { enum class OperationCode { Assign, /// (float& dest, float src) -> void Select, /// (MetaArithmetic, bool pred, float a, float b) -> float FAdd, /// (MetaArithmetic, float a, float b) -> float FMul, /// (MetaArithmetic, float a, float b) -> float FDiv, /// (MetaArithmetic, float a, float b) -> float FFma, /// (MetaArithmetic, float a, float b, float c) -> float FNegate, /// (MetaArithmetic, float a) -> float FAbsolute, /// (MetaArithmetic, float a) -> float FClamp, /// (MetaArithmetic, float value, float min, float max) -> float FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float FMin, /// (MetaArithmetic, float a, float b) -> float FMax, /// (MetaArithmetic, float a, float b) -> float FCos, /// (MetaArithmetic, float a) -> float FSin, /// (MetaArithmetic, float a) -> float FExp2, /// (MetaArithmetic, float a) -> float FLog2, /// (MetaArithmetic, float a) -> float FInverseSqrt, /// (MetaArithmetic, float a) -> float FSqrt, /// (MetaArithmetic, float a) -> float FRoundEven, /// (MetaArithmetic, float a) -> float FFloor, /// (MetaArithmetic, float a) -> float FCeil, /// (MetaArithmetic, float a) -> float FTrunc, /// (MetaArithmetic, float a) -> float FCastInteger, /// (MetaArithmetic, int a) -> float FCastUInteger, /// (MetaArithmetic, uint a) -> float FSwizzleAdd, /// (float a, float b, uint mask) -> float IAdd, /// (MetaArithmetic, int a, int b) -> int IMul, /// (MetaArithmetic, int a, int b) -> int IDiv, /// (MetaArithmetic, int a, int b) -> int INegate, /// (MetaArithmetic, int a) -> int IAbsolute, /// (MetaArithmetic, int a) -> int IMin, /// (MetaArithmetic, int a, int b) -> int IMax, /// (MetaArithmetic, int a, int b) -> int ICastFloat, /// (MetaArithmetic, float a) -> int ICastUnsigned, /// (MetaArithmetic, uint a) -> int ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int IBitwiseNot, /// (MetaArithmetic, int a) -> int IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int IBitCount, /// (MetaArithmetic, int) -> int IBitMSB, /// (MetaArithmetic, int) -> int UAdd, /// (MetaArithmetic, uint a, uint b) -> uint UMul, /// (MetaArithmetic, uint a, uint b) -> uint UDiv, /// (MetaArithmetic, uint a, uint b) -> uint UMin, /// (MetaArithmetic, uint a, uint b) -> uint UMax, /// (MetaArithmetic, uint a, uint b) -> uint UCastFloat, /// (MetaArithmetic, float a) -> uint UCastSigned, /// (MetaArithmetic, int a) -> uint ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint UBitwiseNot, /// (MetaArithmetic, uint a) -> uint UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint UBitCount, /// (MetaArithmetic, uint) -> uint UBitMSB, /// (MetaArithmetic, uint) -> uint HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 HAbsolute, /// (f16vec2 a) -> f16vec2 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 HMergeF32, /// (f16vec2 src) -> float HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 HPack2, /// (float a, float b) -> f16vec2 LogicalAssign, /// (bool& dst, bool src) -> void LogicalAnd, /// (bool a, bool b) -> bool LogicalOr, /// (bool a, bool b) -> bool LogicalXor, /// (bool a, bool b) -> bool LogicalNegate, /// (bool a) -> bool LogicalPick2, /// (bool2 pair, uint index) -> bool LogicalAnd2, /// (bool2 a) -> bool LogicalFOrdLessThan, /// (float a, float b) -> bool LogicalFOrdEqual, /// (float a, float b) -> bool LogicalFOrdLessEqual, /// (float a, float b) -> bool LogicalFOrdGreaterThan, /// (float a, float b) -> bool LogicalFOrdNotEqual, /// (float a, float b) -> bool LogicalFOrdGreaterEqual, /// (float a, float b) -> bool LogicalFOrdered, /// (float a, float b) -> bool LogicalFUnordered, /// (float a, float b) -> bool LogicalFUnordLessThan, /// (float a, float b) -> bool LogicalFUnordEqual, /// (float a, float b) -> bool LogicalFUnordLessEqual, /// (float a, float b) -> bool LogicalFUnordGreaterThan, /// (float a, float b) -> bool LogicalFUnordNotEqual, /// (float a, float b) -> bool LogicalFUnordGreaterEqual, /// (float a, float b) -> bool LogicalILessThan, /// (int a, int b) -> bool LogicalIEqual, /// (int a, int b) -> bool LogicalILessEqual, /// (int a, int b) -> bool LogicalIGreaterThan, /// (int a, int b) -> bool LogicalINotEqual, /// (int a, int b) -> bool LogicalIGreaterEqual, /// (int a, int b) -> bool LogicalULessThan, /// (uint a, uint b) -> bool LogicalUEqual, /// (uint a, uint b) -> bool LogicalULessEqual, /// (uint a, uint b) -> bool LogicalUGreaterThan, /// (uint a, uint b) -> bool LogicalUNotEqual, /// (uint a, uint b) -> bool LogicalUGreaterEqual, /// (uint a, uint b) -> bool LogicalAddCarry, /// (uint a, uint b) -> bool Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Texture, /// (MetaTexture, float[N] coords) -> float4 TextureLod, /// (MetaTexture, float[N] coords) -> float4 TextureGather, /// (MetaTexture, float[N] coords) -> float4 TextureQueryDimensions, /// (MetaTexture, float a) -> float4 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 ImageLoad, /// (MetaImage, int[N] coords) -> void ImageStore, /// (MetaImage, int[N] coords) -> void AtomicImageAdd, /// (MetaImage, int[N] coords) -> void AtomicImageAnd, /// (MetaImage, int[N] coords) -> void AtomicImageOr, /// (MetaImage, int[N] coords) -> void AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void AtomicUExchange, /// (memory, uint) -> uint AtomicUAdd, /// (memory, uint) -> uint AtomicUMin, /// (memory, uint) -> uint AtomicUMax, /// (memory, uint) -> uint AtomicUAnd, /// (memory, uint) -> uint AtomicUOr, /// (memory, uint) -> uint AtomicUXor, /// (memory, uint) -> uint AtomicIExchange, /// (memory, int) -> int AtomicIAdd, /// (memory, int) -> int AtomicIMin, /// (memory, int) -> int AtomicIMax, /// (memory, int) -> int AtomicIAnd, /// (memory, int) -> int AtomicIOr, /// (memory, int) -> int AtomicIXor, /// (memory, int) -> int ReduceUAdd, /// (memory, uint) -> void ReduceUMin, /// (memory, uint) -> void ReduceUMax, /// (memory, uint) -> void ReduceUAnd, /// (memory, uint) -> void ReduceUOr, /// (memory, uint) -> void ReduceUXor, /// (memory, uint) -> void ReduceIAdd, /// (memory, int) -> void ReduceIMin, /// (memory, int) -> void ReduceIMax, /// (memory, int) -> void ReduceIAnd, /// (memory, int) -> void ReduceIOr, /// (memory, int) -> void ReduceIXor, /// (memory, int) -> void Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void PopFlowStack, /// () -> void Exit, /// () -> void Discard, /// () -> void EmitVertex, /// () -> void EndPrimitive, /// () -> void InvocationId, /// () -> int YNegate, /// () -> float LocalInvocationIdX, /// () -> uint LocalInvocationIdY, /// () -> uint LocalInvocationIdZ, /// () -> uint WorkGroupIdX, /// () -> uint WorkGroupIdY, /// () -> uint WorkGroupIdZ, /// () -> uint BallotThread, /// (bool) -> uint VoteAll, /// (bool) -> bool VoteAny, /// (bool) -> bool VoteEqual, /// (bool) -> bool ThreadId, /// () -> uint ThreadEqMask, /// () -> uint ThreadGeMask, /// () -> uint ThreadGtMask, /// () -> uint ThreadLeMask, /// () -> uint ThreadLtMask, /// () -> uint ShuffleIndexed, /// (uint value, uint index) -> uint Barrier, /// () -> void MemoryBarrierGroup, /// () -> void MemoryBarrierGlobal, /// () -> void Amount, }; enum class InternalFlag { Zero = 0, Sign = 1, Carry = 2, Overflow = 3, Amount = 4, }; enum class MetaStackClass { Ssy, Pbk, }; class OperationNode; class ConditionalNode; class GprNode; class CustomVarNode; class ImmediateNode; class InternalFlagNode; class PredicateNode; class AbufNode; class CbufNode; class LmemNode; class PatchNode; class SmemNode; class GmemNode; class CommentNode; using NodeData = std::variant; using Node = std::shared_ptr; using Node4 = std::array; using NodeBlock = std::vector; struct ArraySamplerNode; struct BindlessSamplerNode; struct SeparateSamplerNode; using TrackSamplerData = std::variant; using TrackSampler = std::shared_ptr; struct SamplerEntry { /// Bound samplers constructor explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_indexed{is_indexed_} {} /// Separate sampler constructor explicit SamplerEntry(u32 index_, std::pair offsets, std::pair buffers, Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, bool is_buffer_) : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} /// Bindless samplers constructor explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { } u32 index = 0; ///< Emulated index given for the this sampler. u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. u32 secondary_offset = 0; ///< Secondary offset in the const buffer. u32 buffer = 0; ///< Buffer where the bindless sampler is read. u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. u32 size = 1; ///< Size of the sampler. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. bool is_separated = false; ///< Whether the image and sampler is separated or not. }; /// Represents a tracked bindless sampler into a direct const buffer struct ArraySamplerNode { u32 index; u32 base_offset; u32 bindless_var; }; /// Represents a tracked separate sampler image pair that was folded statically struct SeparateSamplerNode { std::pair indices; std::pair offsets; }; /// Represents a tracked bindless sampler into a direct const buffer struct BindlessSamplerNode { u32 index; u32 offset; }; struct ImageEntry { public: /// Bound images constructor explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) : index{index_}, offset{offset_}, type{type_} {} /// Bindless samplers constructor explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} void MarkWrite() { is_written = true; } void MarkRead() { is_read = true; } void MarkAtomic() { MarkWrite(); MarkRead(); is_atomic = true; } u32 index = 0; u32 offset = 0; u32 buffer = 0; Tegra::Shader::ImageType type{}; bool is_bindless = false; bool is_written = false; bool is_read = false; bool is_atomic = false; }; struct GlobalMemoryBase { u32 cbuf_index = 0; u32 cbuf_offset = 0; [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); } }; /// Parameters describing an arithmetic operation struct MetaArithmetic { bool precise{}; ///< Whether the operation can be constraint or not }; /// Parameters describing a texture sampler struct MetaTexture { SamplerEntry sampler; Node array; Node depth_compare; std::vector aoffi; std::vector ptp; std::vector derivates; Node bias; Node lod; Node component; u32 element{}; Node index; }; struct MetaImage { const ImageEntry& image; std::vector values; u32 element{}; }; /// Parameters that modify an operation but are not part of any particular operand using Meta = std::variant; class AmendNode { public: [[nodiscard]] std::optional GetAmendIndex() const { if (amend_index == amend_null_index) { return std::nullopt; } return {amend_index}; } void SetAmendIndex(std::size_t index) { amend_index = index; } void ClearAmend() { amend_index = amend_null_index; } private: static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; std::size_t amend_index{amend_null_index}; }; /// Holds any kind of operation that can be done in the IR class OperationNode final : public AmendNode { public: explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} explicit OperationNode(OperationCode code_, Meta meta_) : OperationNode(code_, std::move(meta_), std::vector{}) {} explicit OperationNode(OperationCode code_, std::vector operands_) : OperationNode(code_, Meta{}, std::move(operands_)) {} explicit OperationNode(OperationCode code_, Meta meta_, std::vector operands_) : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} template explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} [[nodiscard]] OperationCode GetCode() const { return code; } [[nodiscard]] const Meta& GetMeta() const { return meta; } [[nodiscard]] std::size_t GetOperandsCount() const { return operands.size(); } [[nodiscard]] const Node& operator[](std::size_t operand_index) const { return operands.at(operand_index); } private: OperationCode code{}; Meta meta{}; std::vector operands; }; /// Encloses inside any kind of node that returns a boolean conditionally-executed code class ConditionalNode final : public AmendNode { public: explicit ConditionalNode(Node condition_, std::vector&& code_) : condition{std::move(condition_)}, code{std::move(code_)} {} [[nodiscard]] const Node& GetCondition() const { return condition; } [[nodiscard]] const std::vector& GetCode() const { return code; } private: Node condition; ///< Condition to be satisfied std::vector code; ///< Code to execute }; /// A general purpose register class GprNode final { public: explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} [[nodiscard]] constexpr u32 GetIndex() const { return static_cast(index); } private: Tegra::Shader::Register index{}; }; /// A custom variable class CustomVarNode final { public: explicit constexpr CustomVarNode(u32 index_) : index{index_} {} [[nodiscard]] constexpr u32 GetIndex() const { return index; } private: u32 index{}; }; /// A 32-bits value that represents an immediate value class ImmediateNode final { public: explicit constexpr ImmediateNode(u32 value_) : value{value_} {} [[nodiscard]] constexpr u32 GetValue() const { return value; } private: u32 value{}; }; /// One of Maxwell's internal flags class InternalFlagNode final { public: explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} [[nodiscard]] constexpr InternalFlag GetFlag() const { return flag; } private: InternalFlag flag{}; }; /// A predicate register, it can be negated without additional nodes class PredicateNode final { public: explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) : index{index_}, negated{negated_} {} [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { return index; } [[nodiscard]] constexpr bool IsNegated() const { return negated; } private: Tegra::Shader::Pred index{}; bool negated{}; }; /// Attribute buffer memory (known as attributes or varyings in GLSL terms) class AbufNode final { public: // Initialize for standard attributes (index is explicit). explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) : buffer{std::move(buffer_)}, index{index_}, element{element_} {} // Initialize for physical attributes (index is a variable value). explicit AbufNode(Node physical_address_, Node buffer_ = {}) : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { return index; } [[nodiscard]] u32 GetElement() const { return element; } [[nodiscard]] const Node& GetBuffer() const { return buffer; } [[nodiscard]] bool IsPhysicalBuffer() const { return static_cast(physical_address); } [[nodiscard]] const Node& GetPhysicalAddress() const { return physical_address; } private: Node physical_address; Node buffer; Tegra::Shader::Attribute::Index index{}; u32 element{}; }; /// Patch memory (used to communicate tessellation stages). class PatchNode final { public: explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} [[nodiscard]] constexpr u32 GetOffset() const { return offset; } private: u32 offset{}; }; /// Constant buffer node, usually mapped to uniform buffers in GLSL class CbufNode final { public: explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} [[nodiscard]] u32 GetIndex() const { return index; } [[nodiscard]] const Node& GetOffset() const { return offset; } private: u32 index{}; Node offset; }; /// Local memory node class LmemNode final { public: explicit LmemNode(Node address_) : address{std::move(address_)} {} [[nodiscard]] const Node& GetAddress() const { return address; } private: Node address; }; /// Shared memory node class SmemNode final { public: explicit SmemNode(Node address_) : address{std::move(address_)} {} [[nodiscard]] const Node& GetAddress() const { return address; } private: Node address; }; /// Global memory node class GmemNode final { public: explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, descriptor{descriptor_} {} [[nodiscard]] const Node& GetRealAddress() const { return real_address; } [[nodiscard]] const Node& GetBaseAddress() const { return base_address; } [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { return descriptor; } private: Node real_address; Node base_address; GlobalMemoryBase descriptor; }; /// Commentary, can be dropped class CommentNode final { public: explicit CommentNode(std::string text_) : text{std::move(text_)} {} [[nodiscard]] const std::string& GetText() const { return text; } private: std::string text; }; } // namespace VideoCommon::Shader