summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/frontend/ir/ir_emitter.h
blob: 17bc32fc83795a69f2e2eacdafdc62a856e3fa2e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#pragma once

#include <cstring>
#include <type_traits>

#include "shader_recompiler/frontend/ir/attribute.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/value.h"

namespace Shader::IR {

class IREmitter {
public:
    explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
    explicit IREmitter(Block& block_, Block::iterator insertion_point_)
        : block{&block_}, insertion_point{insertion_point_} {}

    Block* block;

    [[nodiscard]] U1 Imm1(bool value) const;
    [[nodiscard]] U8 Imm8(u8 value) const;
    [[nodiscard]] U16 Imm16(u16 value) const;
    [[nodiscard]] U32 Imm32(u32 value) const;
    [[nodiscard]] U32 Imm32(s32 value) const;
    [[nodiscard]] F32 Imm32(f32 value) const;
    [[nodiscard]] U64 Imm64(u64 value) const;
    [[nodiscard]] U64 Imm64(s64 value) const;
    [[nodiscard]] F64 Imm64(f64 value) const;

    void Branch(Block* label);
    void BranchConditional(const U1& condition, Block* true_label, Block* false_label);
    void LoopMerge(Block* merge_block, Block* continue_target);
    void SelectionMerge(Block* merge_block);
    void Return();
    void Unreachable();
    void DemoteToHelperInvocation(Block* continue_label);

    void Prologue();
    void Epilogue();

    [[nodiscard]] U32 GetReg(IR::Reg reg);
    void SetReg(IR::Reg reg, const U32& value);

    [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false);
    void SetPred(IR::Pred pred, const U1& value);

    [[nodiscard]] U1 GetGotoVariable(u32 id);
    void SetGotoVariable(u32 id, const U1& value);

    [[nodiscard]] U32 GetIndirectBranchVariable();
    void SetIndirectBranchVariable(const U32& value);

    [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
    [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
                               bool is_signed);
    [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);

    [[nodiscard]] U1 GetZFlag();
    [[nodiscard]] U1 GetSFlag();
    [[nodiscard]] U1 GetCFlag();
    [[nodiscard]] U1 GetOFlag();

    void SetZFlag(const U1& value);
    void SetSFlag(const U1& value);
    void SetCFlag(const U1& value);
    void SetOFlag(const U1& value);

    [[nodiscard]] U1 Condition(IR::Condition cond);
    [[nodiscard]] U1 GetFlowTestResult(FlowTest test);

    [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
    void SetAttribute(IR::Attribute attribute, const F32& value);

    void SetFragColor(u32 index, u32 component, const F32& value);
    void SetFragDepth(const F32& value);

    [[nodiscard]] U32 WorkgroupIdX();
    [[nodiscard]] U32 WorkgroupIdY();
    [[nodiscard]] U32 WorkgroupIdZ();

    [[nodiscard]] U32 LocalInvocationIdX();
    [[nodiscard]] U32 LocalInvocationIdY();
    [[nodiscard]] U32 LocalInvocationIdZ();

    [[nodiscard]] U32 LoadGlobalU8(const U64& address);
    [[nodiscard]] U32 LoadGlobalS8(const U64& address);
    [[nodiscard]] U32 LoadGlobalU16(const U64& address);
    [[nodiscard]] U32 LoadGlobalS16(const U64& address);
    [[nodiscard]] U32 LoadGlobal32(const U64& address);
    [[nodiscard]] Value LoadGlobal64(const U64& address);
    [[nodiscard]] Value LoadGlobal128(const U64& address);

    void WriteGlobalU8(const U64& address, const U32& value);
    void WriteGlobalS8(const U64& address, const U32& value);
    void WriteGlobalU16(const U64& address, const U32& value);
    void WriteGlobalS16(const U64& address, const U32& value);
    void WriteGlobal32(const U64& address, const U32& value);
    void WriteGlobal64(const U64& address, const IR::Value& vector);
    void WriteGlobal128(const U64& address, const IR::Value& vector);

    [[nodiscard]] U32 LoadLocal(const U32& word_offset);
    void WriteLocal(const U32& word_offset, const U32& value);

    [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
    void WriteShared(int bit_size, const U32& offset, const Value& value);

    [[nodiscard]] U1 GetZeroFromOp(const Value& op);
    [[nodiscard]] U1 GetSignFromOp(const Value& op);
    [[nodiscard]] U1 GetCarryFromOp(const Value& op);
    [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
    [[nodiscard]] U1 GetSparseFromOp(const Value& op);
    [[nodiscard]] U1 GetInBoundsFromOp(const Value& op);

    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
                                           const Value& e4);
    [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
    [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);

    [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
                               const Value& false_value);

    template <typename Dest, typename Source>
    [[nodiscard]] Dest BitCast(const Source& value);

    [[nodiscard]] U64 PackUint2x32(const Value& vector);
    [[nodiscard]] Value UnpackUint2x32(const U64& value);

    [[nodiscard]] U32 PackFloat2x16(const Value& vector);
    [[nodiscard]] Value UnpackFloat2x16(const U32& value);

    [[nodiscard]] U32 PackHalf2x16(const Value& vector);
    [[nodiscard]] Value UnpackHalf2x16(const U32& value);

    [[nodiscard]] F64 PackDouble2x32(const Value& vector);
    [[nodiscard]] Value UnpackDouble2x32(const F64& value);

    [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
    [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
    [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
                                  FpControl control = {});

    [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
    [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
    [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);

    [[nodiscard]] F32 FPCos(const F32& value);
    [[nodiscard]] F32 FPSin(const F32& value);
    [[nodiscard]] F32 FPExp2(const F32& value);
    [[nodiscard]] F32 FPLog2(const F32& value);
    [[nodiscard]] F32F64 FPRecip(const F32F64& value);
    [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
    [[nodiscard]] F32 FPSqrt(const F32& value);
    [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
    [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value,
                                    const F16F32F64& max_value);
    [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
    [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
    [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
    [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});

    [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
                             bool ordered = true);
    [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
                                bool ordered = true);
    [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
                                bool ordered = true);
    [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs,
                                   FpControl control = {}, bool ordered = true);
    [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
                                     FpControl control = {}, bool ordered = true);
    [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
                                        FpControl control = {}, bool ordered = true);
    [[nodiscard]] U1 FPIsNan(const F16F32F64& value);
    [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
    [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
    [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
    [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});

    [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
    [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
    [[nodiscard]] U32 IMul(const U32& a, const U32& b);
    [[nodiscard]] U32U64 INeg(const U32U64& value);
    [[nodiscard]] U32U64 IAbs(const U32U64& value);
    [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
    [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
    [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
    [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
    [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
    [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
    [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
                                     const U32& count);
    [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
                                      bool is_signed = false);
    [[nodiscard]] U32 BitReverse(const U32& value);
    [[nodiscard]] U32 BitCount(const U32& value);
    [[nodiscard]] U32 BitwiseNot(const U32& value);

    [[nodiscard]] U32 FindSMsb(const U32& value);
    [[nodiscard]] U32 FindUMsb(const U32& value);
    [[nodiscard]] U32 SMin(const U32& a, const U32& b);
    [[nodiscard]] U32 UMin(const U32& a, const U32& b);
    [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
    [[nodiscard]] U32 SMax(const U32& a, const U32& b);
    [[nodiscard]] U32 UMax(const U32& a, const U32& b);
    [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
    [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
    [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);

    [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
    [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
    [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
    [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
    [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
    [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);

    [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
    [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
    [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
    [[nodiscard]] U1 LogicalNot(const U1& value);

    [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
    [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
    [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
    [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize,
                                        const Value& value);
    [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize,
                                        const Value& value);
    [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
                                        const Value& value);

    [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
    [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value,
                                      FpControl control = {});

    [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
                                               const F32& bias, const Value& offset,
                                               const F32& lod_clamp, TextureInstInfo info);
    [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
                                               const F32& lod, const Value& offset,
                                               const F32& lod_clamp, TextureInstInfo info);
    [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
                                                 const F32& dref, const F32& bias,
                                                 const Value& offset, const F32& lod_clamp,
                                                 TextureInstInfo info);
    [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
                                                 const F32& dref, const F32& lod,
                                                 const Value& offset, const F32& lod_clamp,
                                                 TextureInstInfo info);
    [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);

    [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
                                    const Value& offset2, TextureInstInfo info);

    [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
                                        const Value& offset, const Value& offset2, const F32& dref,
                                        TextureInstInfo info);

    [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
                                   const U32& lod, const U32& multisampling, TextureInstInfo info);

    [[nodiscard]] U1 VoteAll(const U1& value);
    [[nodiscard]] U1 VoteAny(const U1& value);
    [[nodiscard]] U1 VoteEqual(const U1& value);
    [[nodiscard]] U32 SubgroupBallot(const U1& value);
    [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
                                   const IR::U32& seg_mask);
    [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
                                const IR::U32& seg_mask);
    [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
                                  const IR::U32& seg_mask);
    [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
                                       const IR::U32& clamp, const IR::U32& seg_mask);

private:
    IR::Block::iterator insertion_point;

    template <typename T = Value, typename... Args>
    T Inst(Opcode op, Args... args) {
        auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
        return T{Value{&*it}};
    }

    template <typename T>
    requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
        Flags() = default;
        Flags(T proxy_) : proxy{proxy_} {}

        T proxy;
    };

    template <typename T = Value, typename FlagType, typename... Args>
    T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
        u32 raw_flags{};
        std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
        auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
        return T{Value{&*it}};
    }
};

} // namespace Shader::IR