1 files changed, 75 insertions, 31 deletions
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 4888998d3..65f1e1de9 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -109,11 +109,13 @@ enum class OperationCode {
     UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
     UBitCount,        /// (MetaArithmetic, uint) -> uint
 
-    HAdd,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HMul,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HFma,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
+    HAdd,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HMul,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HFma,      /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
     HAbsolute, /// (f16vec2 a) -> f16vec2
     HNegate,   /// (f16vec2 a, bool first, bool second) -> f16vec2
+    HClamp,    /// (f16vec2 src, float min, float max) -> f16vec2
+    HUnpack,   /// (Tegra::Shader::HalfType, T value) -> f16vec2
     HMergeF32, /// (f16vec2 src) -> float
     HMergeH0,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
     HMergeH1,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
@@ -150,12 +152,18 @@ enum class OperationCode {
     LogicalUNotEqual,     /// (uint a, uint b) -> bool
     LogicalUGreaterEqual, /// (uint a, uint b) -> bool
 
-    Logical2HLessThan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HEqual,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HLessEqual,    /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HGreaterThan,  /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HNotEqual,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-    Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HLessThan,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HEqual,               /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HLessEqual,           /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterThan,         /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HNotEqual,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterEqual,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HLessThanWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HEqualWithNan,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HLessEqualWithNan,    /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterThanWithNan,  /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HNotEqualWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
+    Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 
     Texture,                /// (MetaTexture, float[N] coords) -> float4
     TextureLod,             /// (MetaTexture, float[N] coords) -> float4
@@ -196,9 +204,23 @@ enum class ExitMethod {
 
 class Sampler {
 public:
+    // Use this constructor for bounded Samplers
     explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
                      bool is_array, bool is_shadow)
-        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
+        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+          is_bindless{false} {}
+
+    // Use this constructor for bindless Samplers
+    explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
+                     Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
+        : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
+          is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
+
+    // Use this only for serialization/deserialization
+    explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
+                     bool is_array, bool is_shadow, bool is_bindless)
+        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+          is_bindless{is_bindless} {}
 
     std::size_t GetOffset() const {
         return offset;
@@ -220,9 +242,18 @@ public:
         return is_shadow;
     }
 
+    bool IsBindless() const {
+        return is_bindless;
+    }
+
+    std::pair<u32, u32> GetBindlessCBuf() const {
+        return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
+    }
+
     bool operator<(const Sampler& rhs) const {
-        return std::tie(offset, index, type, is_array, is_shadow) <
-               std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow);
+        return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
+               std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
+                        rhs.is_bindless);
     }
 
 private:
@@ -231,8 +262,9 @@ private:
     std::size_t offset{};
     std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
-    bool is_array{};  ///< Whether the texture is being sampled as an array texture or not.
-    bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
+    bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.
+    bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.
+    bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
 };
 
 class ConstBuffer {
@@ -276,15 +308,13 @@ struct GlobalMemoryBase {
     }
 };
 
-struct MetaArithmetic {
-    bool precise{};
+struct GlobalMemoryUsage {
+    bool is_read{};
+    bool is_written{};
 };
 
-struct MetaHalfArithmetic {
+struct MetaArithmetic {
     bool precise{};
-    std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1,
-                                                    Tegra::Shader::HalfType::H0_H1,
-                                                    Tegra::Shader::HalfType::H0_H1};
 };
 
 struct MetaTexture {
@@ -298,11 +328,10 @@ struct MetaTexture {
     u32 element{};
 };
 
-constexpr MetaArithmetic PRECISE = {true};
-constexpr MetaArithmetic NO_PRECISE = {false};
-constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false};
+inline constexpr MetaArithmetic PRECISE = {true};
+inline constexpr MetaArithmetic NO_PRECISE = {false};
 
-using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>;
+using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
 
 /// Holds any kind of operation that can be done in the IR
 class OperationNode final {
@@ -578,8 +607,8 @@ public:
         return used_clip_distances;
     }
 
-    const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
-        return used_global_memory_bases;
+    const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
+        return used_global_memory;
     }
 
     std::size_t GetLength() const {
@@ -706,10 +735,14 @@ private:
 
     /// Unpacks a half immediate from an instruction
     Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
+    /// Unpacks a binary value into a half float pair with a type format
+    Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
     /// Merges a half pair into another value
     Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
     /// Conditionally absolute/negated half float pair. Absolute is applied first
     Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
+    /// Conditionally saturates a half float pair
+    Node GetSaturatedHalfFloat(Node value, bool saturate = true);
 
     /// Returns a predicate comparing two floats
     Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
@@ -717,8 +750,7 @@ private:
     Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
                                        Node op_a, Node op_b);
     /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
-    Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
-                                    const MetaHalfArithmetic& meta, Node op_a, Node op_b);
+    Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
 
     /// Returns a predicate combiner operation
     OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
@@ -730,6 +762,11 @@ private:
     const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
                               Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
 
+    // Accesses a texture sampler for a bindless texture.
+    const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
+                                      Tegra::Shader::TextureType type, bool is_array,
+                                      bool is_shadow);
+
     /// Extracts a sequence of bits from a node
     Node BitfieldExtract(Node value, u32 offset, u32 bits);
 
@@ -743,7 +780,8 @@ private:
 
     Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
-                     bool is_array, bool is_aoffi);
+                     bool is_array, bool is_aoffi,
+                     std::optional<Tegra::Shader::Register> bindless_reg);
 
     Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                       Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
@@ -763,7 +801,8 @@ private:
 
     Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                          Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
-                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
+                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
+                         std::optional<Tegra::Shader::Register> bindless_reg);
 
     Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
                          u64 byte_height);
@@ -781,6 +820,11 @@ private:
 
     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
 
+    std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
+                                                                     Node addr_register,
+                                                                     u32 immediate_offset,
+                                                                     bool is_write);
+
     template <typename... T>
     Node Operation(OperationCode code, const T*... operands) {
         return StoreNode(OperationNode(code, operands...));
@@ -834,7 +878,7 @@ private:
     std::map<u32, ConstBuffer> used_cbufs;
     std::set<Sampler> used_samplers;
     std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
-    std::set<GlobalMemoryBase> used_global_memory_bases;
+    std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
 
     Tegra::Shader::Header header;
 };