summaryrefslogblamecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
blob: eb5158407d79e9d6aad6350e2c47d0a6dc60162c (plain) (tree)
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787


























































































































































































































































































                                                                                                    
                                                 





                                                                                          
                                                 






                                                                                             
                                                 






                                                                                                 
                                                 

























                                                               
                                                  


































                                                                                    
                                                                                           
                                                      
                                          



                                                                       
                                                 



















                                                                                                   
                                                 









                                                                             
                                                 

























































































































































































































































































































































































                                                                                                

                              












































































































































































































                                                                                                    








                                     

























































































































































                                                                                              
                                                         
                                              




                                                                             
                                              




                                                                            
                                              















                                                                                 
                                              











































                                                                                 
                                                 




                                                                    
                                                 






























































































                                                                                              
                                                 




                                                                                       
                                                 







                                                                                          
                                                          





                                                               
                                                          


















































































































































































                                                                                               
                                             








                                                                                         
                                             


                                     

                                                                    






















                                                                                                  
                                             





                                                               






                                                                                            
 







                                                                                         








































































































































































                                                                                     
                                             





                                                                                
                                             





                                                              
                                             








                                                                
                                             








                                                                 
                                             














































































































































































































































































                                                                                                    
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#include <algorithm>
#include <array>
#include <cstddef>
#include <string>
#include <string_view>
#include <utility>
#include <variant>

#include <fmt/format.h>

#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"

// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
// GLASM lacks booleans, so they have to be implemented as integers.
// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
// select between two values, because -1 will be evaluated as true and 0 as false.

namespace OpenGL {

namespace {

using Tegra::Engines::ShaderType;
using Tegra::Shader::Attribute;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using namespace VideoCommon::Shader;
using Operation = const OperationNode&;

constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};

char Swizzle(std::size_t component) {
    ASSERT(component < 4);
    return component["xyzw"];
}

constexpr bool IsGenericAttribute(Attribute::Index index) {
    return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
}

u32 GetGenericAttributeIndex(Attribute::Index index) {
    ASSERT(IsGenericAttribute(index));
    return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
}

std::string_view Modifiers(Operation operation) {
    const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
    if (meta && meta->precise) {
        return ".PREC";
    }
    return "";
}

std::string_view GetInputFlags(PixelImap attribute) {
    switch (attribute) {
    case PixelImap::Perspective:
        return "";
    case PixelImap::Constant:
        return "FLAT ";
    case PixelImap::ScreenLinear:
        return "NOPERSPECTIVE ";
    case PixelImap::Unused:
        break;
    }
    UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
    return {};
}

std::string_view ImageType(Tegra::Shader::ImageType image_type) {
    switch (image_type) {
    case Tegra::Shader::ImageType::Texture1D:
        return "1D";
    case Tegra::Shader::ImageType::TextureBuffer:
        return "BUFFER";
    case Tegra::Shader::ImageType::Texture1DArray:
        return "ARRAY1D";
    case Tegra::Shader::ImageType::Texture2D:
        return "2D";
    case Tegra::Shader::ImageType::Texture2DArray:
        return "ARRAY2D";
    case Tegra::Shader::ImageType::Texture3D:
        return "3D";
    }
    UNREACHABLE();
    return {};
}

std::string_view StackName(MetaStackClass stack) {
    switch (stack) {
    case MetaStackClass::Ssy:
        return "SSY";
    case MetaStackClass::Pbk:
        return "PBK";
    }
    UNREACHABLE();
    return "";
};

std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
    switch (topology) {
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
        return "POINTS";
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
        return "LINES";
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
        return "LINES_ADJACENCY";
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
        return "TRIANGLES";
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
        return "TRIANGLES_ADJACENCY";
    default:
        UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
        return "POINTS";
    }
}

std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
    switch (topology) {
    case Tegra::Shader::OutputTopology::PointList:
        return "POINTS";
    case Tegra::Shader::OutputTopology::LineStrip:
        return "LINE_STRIP";
    case Tegra::Shader::OutputTopology::TriangleStrip:
        return "TRIANGLE_STRIP";
    default:
        UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
        return "points";
    }
}

std::string_view StageInputName(ShaderType stage) {
    switch (stage) {
    case ShaderType::Vertex:
    case ShaderType::Geometry:
        return "vertex";
    case ShaderType::Fragment:
        return "fragment";
    case ShaderType::Compute:
        return "invocation";
    default:
        UNREACHABLE();
        return "";
    }
}

std::string TextureType(const MetaTexture& meta) {
    if (meta.sampler.is_buffer) {
        return "BUFFER";
    }
    std::string type;
    if (meta.sampler.is_shadow) {
        type += "SHADOW";
    }
    if (meta.sampler.is_array) {
        type += "ARRAY";
    }
    type += [&meta] {
        switch (meta.sampler.type) {
        case Tegra::Shader::TextureType::Texture1D:
            return "1D";
        case Tegra::Shader::TextureType::Texture2D:
            return "2D";
        case Tegra::Shader::TextureType::Texture3D:
            return "3D";
        case Tegra::Shader::TextureType::TextureCube:
            return "CUBE";
        }
        UNREACHABLE();
        return "2D";
    }();
    return type;
}

std::string GlobalMemoryName(const GlobalMemoryBase& base) {
    return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
}

class ARBDecompiler final {
public:
    explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
                           ShaderType stage, std::string_view identifier);

    std::string Code() const {
        return shader_source;
    }

private:
    void DeclareHeader();
    void DeclareVertex();
    void DeclareGeometry();
    void DeclareFragment();
    void DeclareCompute();
    void DeclareInputAttributes();
    void DeclareOutputAttributes();
    void DeclareLocalMemory();
    void DeclareGlobalMemory();
    void DeclareConstantBuffers();
    void DeclareRegisters();
    void DeclareTemporaries();
    void DeclarePredicates();
    void DeclareInternalFlags();

    void InitializeVariables();

    void DecompileAST();
    void DecompileBranchMode();

    void VisitAST(const ASTNode& node);
    std::string VisitExpression(const Expr& node);

    void VisitBlock(const NodeBlock& bb);

    std::string Visit(const Node& node);

    std::pair<std::string, std::size_t> BuildCoords(Operation);
    std::string BuildAoffi(Operation);
    void Exit();

    std::string Assign(Operation);
    std::string Select(Operation);
    std::string FClamp(Operation);
    std::string FCastHalf0(Operation);
    std::string FCastHalf1(Operation);
    std::string FSqrt(Operation);
    std::string FSwizzleAdd(Operation);
    std::string HAdd2(Operation);
    std::string HMul2(Operation);
    std::string HFma2(Operation);
    std::string HAbsolute(Operation);
    std::string HNegate(Operation);
    std::string HClamp(Operation);
    std::string HCastFloat(Operation);
    std::string HUnpack(Operation);
    std::string HMergeF32(Operation);
    std::string HMergeH0(Operation);
    std::string HMergeH1(Operation);
    std::string HPack2(Operation);
    std::string LogicalAssign(Operation);
    std::string LogicalPick2(Operation);
    std::string LogicalAnd2(Operation);
    std::string FloatOrdered(Operation);
    std::string FloatUnordered(Operation);
    std::string LogicalAddCarry(Operation);
    std::string Texture(Operation);
    std::string TextureGather(Operation);
    std::string TextureQueryDimensions(Operation);
    std::string TextureQueryLod(Operation);
    std::string TexelFetch(Operation);
    std::string TextureGradient(Operation);
    std::string ImageLoad(Operation);
    std::string ImageStore(Operation);
    std::string Branch(Operation);
    std::string BranchIndirect(Operation);
    std::string PushFlowStack(Operation);
    std::string PopFlowStack(Operation);
    std::string Exit(Operation);
    std::string Discard(Operation);
    std::string EmitVertex(Operation);
    std::string EndPrimitive(Operation);
    std::string InvocationId(Operation);
    std::string YNegate(Operation);
    std::string ThreadId(Operation);
    std::string ShuffleIndexed(Operation);
    std::string Barrier(Operation);
    std::string MemoryBarrierGroup(Operation);
    std::string MemoryBarrierGlobal(Operation);

    template <const std::string_view& op>
    std::string Unary(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
        return temporary;
    }

    template <const std::string_view& op>
    std::string Binary(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
                Visit(operation[1]));
        return temporary;
    }

    template <const std::string_view& op>
    std::string Trinary(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
                Visit(operation[1]), Visit(operation[2]));
        return temporary;
    }

    template <const std::string_view& op, bool unordered>
    std::string FloatComparison(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
        AddLine("MOV.S {}, 0;", temporary);
        AddLine("MOV.S {} (NE.x), -1;", temporary);

        const std::string op_a = Visit(operation[0]);
        const std::string op_b = Visit(operation[1]);
        if constexpr (unordered) {
            AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
            AddLine("TRUNC.U.CC RC.x, RC.x;");
            AddLine("MOV.S {} (NE.x), -1;", temporary);
            AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
            AddLine("TRUNC.U.CC RC.x, RC.x;");
            AddLine("MOV.S {} (NE.x), -1;", temporary);
        } else if (op == SNE_F) {
            AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
            AddLine("TRUNC.U.CC RC.x, RC.x;");
            AddLine("MOV.S {} (NE.x), 0;", temporary);
            AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
            AddLine("TRUNC.U.CC RC.x, RC.x;");
            AddLine("MOV.S {} (NE.x), 0;", temporary);
        }
        return temporary;
    }

    template <const std::string_view& op, bool is_nan>
    std::string HalfComparison(Operation operation) {
        std::string tmp1 = AllocVectorTemporary();
        const std::string tmp2 = AllocVectorTemporary();
        const std::string op_a = Visit(operation[0]);
        const std::string op_b = Visit(operation[1]);
        AddLine("UP2H.F {}, {};", tmp1, op_a);
        AddLine("UP2H.F {}, {};", tmp2, op_b);
        AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
        AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
        AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
        AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
        AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
        if constexpr (is_nan) {
            AddLine("MOVC.F RC.x, {};", op_a);
            AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
            AddLine("MOVC.F RC.x, {};", op_b);
            AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
        }
        return tmp1;
    }

    template <const std::string_view& op, const std::string_view& type>
    std::string AtomicImage(Operation operation) {
        const auto& meta = std::get<MetaImage>(operation.GetMeta());
        const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
        const std::size_t num_coords = operation.GetOperandsCount();
        const std::size_t num_values = meta.values.size();

        const std::string coord = AllocVectorTemporary();
        const std::string value = AllocVectorTemporary();
        for (std::size_t i = 0; i < num_coords; ++i) {
            AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
        }
        for (std::size_t i = 0; i < num_values; ++i) {
            AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
        }

        AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
                image_id, ImageType(meta.image.type));
        return fmt::format("{}.x", coord);
    }

    template <const std::string_view& op, const std::string_view& type>
    std::string Atomic(Operation operation) {
        std::string temporary = AllocTemporary();
        std::string address;
        std::string_view opname;
        if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
            AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
                    Visit(gmem->GetBaseAddress()));
            address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
            opname = "ATOMB";
        } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
            address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
            opname = "ATOMS";
        } else {
            UNREACHABLE();
            return "{0, 0, 0, 0}";
        }
        AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
        return temporary;
    }

    template <char type>
    std::string Negate(Operation operation) {
        std::string temporary = AllocTemporary();
        if constexpr (type == 'F') {
            AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
        } else {
            AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
        }
        return temporary;
    }

    template <char type>
    std::string Absolute(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
        return temporary;
    }

    template <char type>
    std::string BitfieldInsert(Operation operation) {
        const std::string temporary = AllocVectorTemporary();
        AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
        AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
        AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
                Visit(operation[0]));
        return fmt::format("{}.x", temporary);
    }

    template <char type>
    std::string BitfieldExtract(Operation operation) {
        const std::string temporary = AllocVectorTemporary();
        AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
        AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
        AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
        return fmt::format("{}.x", temporary);
    }

    template <char swizzle>
    std::string LocalInvocationId(Operation) {
        return fmt::format("invocation.localid.{}", swizzle);
    }

    template <char swizzle>
    std::string WorkGroupId(Operation) {
        return fmt::format("invocation.groupid.{}", swizzle);
    }

    template <char c1, char c2>
    std::string ThreadMask(Operation) {
        return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
    }

    template <typename... Args>
    void AddExpression(std::string_view text, Args&&... args) {
        shader_source += fmt::format(text, std::forward<Args>(args)...);
    }

    template <typename... Args>
    void AddLine(std::string_view text, Args&&... args) {
        AddExpression(text, std::forward<Args>(args)...);
        shader_source += '\n';
    }

    std::string AllocTemporary() {
        max_temporaries = std::max(max_temporaries, num_temporaries + 1);
        return fmt::format("T{}.x", num_temporaries++);
    }

    std::string AllocVectorTemporary() {
        max_temporaries = std::max(max_temporaries, num_temporaries + 1);
        return fmt::format("T{}", num_temporaries++);
    }

    void ResetTemporaries() noexcept {
        num_temporaries = 0;
    }

    const Device& device;
    const ShaderIR& ir;
    const Registry& registry;
    const ShaderType stage;

    std::size_t num_temporaries = 0;
    std::size_t max_temporaries = 0;

    std::string shader_source;

    static constexpr std::string_view ADD_F32 = "ADD.F32";
    static constexpr std::string_view ADD_S = "ADD.S";
    static constexpr std::string_view ADD_U = "ADD.U";
    static constexpr std::string_view MUL_F32 = "MUL.F32";
    static constexpr std::string_view MUL_S = "MUL.S";
    static constexpr std::string_view MUL_U = "MUL.U";
    static constexpr std::string_view DIV_F32 = "DIV.F32";
    static constexpr std::string_view DIV_S = "DIV.S";
    static constexpr std::string_view DIV_U = "DIV.U";
    static constexpr std::string_view MAD_F32 = "MAD.F32";
    static constexpr std::string_view RSQ_F32 = "RSQ.F32";
    static constexpr std::string_view COS_F32 = "COS.F32";
    static constexpr std::string_view SIN_F32 = "SIN.F32";
    static constexpr std::string_view EX2_F32 = "EX2.F32";
    static constexpr std::string_view LG2_F32 = "LG2.F32";
    static constexpr std::string_view SLT_F = "SLT.F32";
    static constexpr std::string_view SLT_S = "SLT.S";
    static constexpr std::string_view SLT_U = "SLT.U";
    static constexpr std::string_view SEQ_F = "SEQ.F32";
    static constexpr std::string_view SEQ_S = "SEQ.S";
    static constexpr std::string_view SEQ_U = "SEQ.U";
    static constexpr std::string_view SLE_F = "SLE.F32";
    static constexpr std::string_view SLE_S = "SLE.S";
    static constexpr std::string_view SLE_U = "SLE.U";
    static constexpr std::string_view SGT_F = "SGT.F32";
    static constexpr std::string_view SGT_S = "SGT.S";
    static constexpr std::string_view SGT_U = "SGT.U";
    static constexpr std::string_view SNE_F = "SNE.F32";
    static constexpr std::string_view SNE_S = "SNE.S";
    static constexpr std::string_view SNE_U = "SNE.U";
    static constexpr std::string_view SGE_F = "SGE.F32";
    static constexpr std::string_view SGE_S = "SGE.S";
    static constexpr std::string_view SGE_U = "SGE.U";
    static constexpr std::string_view AND_S = "AND.S";
    static constexpr std::string_view AND_U = "AND.U";
    static constexpr std::string_view TRUNC_F = "TRUNC.F";
    static constexpr std::string_view TRUNC_S = "TRUNC.S";
    static constexpr std::string_view TRUNC_U = "TRUNC.U";
    static constexpr std::string_view SHL_S = "SHL.S";
    static constexpr std::string_view SHL_U = "SHL.U";
    static constexpr std::string_view SHR_S = "SHR.S";
    static constexpr std::string_view SHR_U = "SHR.U";
    static constexpr std::string_view OR_S = "OR.S";
    static constexpr std::string_view OR_U = "OR.U";
    static constexpr std::string_view XOR_S = "XOR.S";
    static constexpr std::string_view XOR_U = "XOR.U";
    static constexpr std::string_view NOT_S = "NOT.S";
    static constexpr std::string_view NOT_U = "NOT.U";
    static constexpr std::string_view BTC_S = "BTC.S";
    static constexpr std::string_view BTC_U = "BTC.U";
    static constexpr std::string_view BTFM_S = "BTFM.S";
    static constexpr std::string_view BTFM_U = "BTFM.U";
    static constexpr std::string_view ROUND_F = "ROUND.F";
    static constexpr std::string_view CEIL_F = "CEIL.F";
    static constexpr std::string_view FLR_F = "FLR.F";
    static constexpr std::string_view I2F_S = "I2F.S";
    static constexpr std::string_view I2F_U = "I2F.U";
    static constexpr std::string_view MIN_F = "MIN.F";
    static constexpr std::string_view MIN_S = "MIN.S";
    static constexpr std::string_view MIN_U = "MIN.U";
    static constexpr std::string_view MAX_F = "MAX.F";
    static constexpr std::string_view MAX_S = "MAX.S";
    static constexpr std::string_view MAX_U = "MAX.U";
    static constexpr std::string_view MOV_U = "MOV.U";
    static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
    static constexpr std::string_view TGALL_U = "TGALL.U";
    static constexpr std::string_view TGANY_U = "TGANY.U";
    static constexpr std::string_view TGEQ_U = "TGEQ.U";
    static constexpr std::string_view EXCH = "EXCH";
    static constexpr std::string_view ADD = "ADD";
    static constexpr std::string_view MIN = "MIN";
    static constexpr std::string_view MAX = "MAX";
    static constexpr std::string_view AND = "AND";
    static constexpr std::string_view OR = "OR";
    static constexpr std::string_view XOR = "XOR";
    static constexpr std::string_view U32 = "U32";
    static constexpr std::string_view S32 = "S32";

    static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
    using DecompilerType = std::string (ARBDecompiler::*)(Operation);
    static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
        &ARBDecompiler::Assign,

        &ARBDecompiler::Select,

        &ARBDecompiler::Binary<ADD_F32>,
        &ARBDecompiler::Binary<MUL_F32>,
        &ARBDecompiler::Binary<DIV_F32>,
        &ARBDecompiler::Trinary<MAD_F32>,
        &ARBDecompiler::Negate<'F'>,
        &ARBDecompiler::Absolute<'F'>,
        &ARBDecompiler::FClamp,
        &ARBDecompiler::FCastHalf0,
        &ARBDecompiler::FCastHalf1,
        &ARBDecompiler::Binary<MIN_F>,
        &ARBDecompiler::Binary<MAX_F>,
        &ARBDecompiler::Unary<COS_F32>,
        &ARBDecompiler::Unary<SIN_F32>,
        &ARBDecompiler::Unary<EX2_F32>,
        &ARBDecompiler::Unary<LG2_F32>,
        &ARBDecompiler::Unary<RSQ_F32>,
        &ARBDecompiler::FSqrt,
        &ARBDecompiler::Unary<ROUND_F>,
        &ARBDecompiler::Unary<FLR_F>,
        &ARBDecompiler::Unary<CEIL_F>,
        &ARBDecompiler::Unary<TRUNC_F>,
        &ARBDecompiler::Unary<I2F_S>,
        &ARBDecompiler::Unary<I2F_U>,
        &ARBDecompiler::FSwizzleAdd,

        &ARBDecompiler::Binary<ADD_S>,
        &ARBDecompiler::Binary<MUL_S>,
        &ARBDecompiler::Binary<DIV_S>,
        &ARBDecompiler::Negate<'S'>,
        &ARBDecompiler::Absolute<'S'>,
        &ARBDecompiler::Binary<MIN_S>,
        &ARBDecompiler::Binary<MAX_S>,

        &ARBDecompiler::Unary<TRUNC_S>,
        &ARBDecompiler::Unary<MOV_U>,
        &ARBDecompiler::Binary<SHL_S>,
        &ARBDecompiler::Binary<SHR_U>,
        &ARBDecompiler::Binary<SHR_S>,
        &ARBDecompiler::Binary<AND_S>,
        &ARBDecompiler::Binary<OR_S>,
        &ARBDecompiler::Binary<XOR_S>,
        &ARBDecompiler::Unary<NOT_S>,
        &ARBDecompiler::BitfieldInsert<'S'>,
        &ARBDecompiler::BitfieldExtract<'S'>,
        &ARBDecompiler::Unary<BTC_S>,
        &ARBDecompiler::Unary<BTFM_S>,

        &ARBDecompiler::Binary<ADD_U>,
        &ARBDecompiler::Binary<MUL_U>,
        &ARBDecompiler::Binary<DIV_U>,
        &ARBDecompiler::Binary<MIN_U>,
        &ARBDecompiler::Binary<MAX_U>,
        &ARBDecompiler::Unary<TRUNC_U>,
        &ARBDecompiler::Unary<MOV_U>,
        &ARBDecompiler::Binary<SHL_U>,
        &ARBDecompiler::Binary<SHR_U>,
        &ARBDecompiler::Binary<SHR_U>,
        &ARBDecompiler::Binary<AND_U>,
        &ARBDecompiler::Binary<OR_U>,
        &ARBDecompiler::Binary<XOR_U>,
        &ARBDecompiler::Unary<NOT_U>,
        &ARBDecompiler::BitfieldInsert<'U'>,
        &ARBDecompiler::BitfieldExtract<'U'>,
        &ARBDecompiler::Unary<BTC_U>,
        &ARBDecompiler::Unary<BTFM_U>,

        &ARBDecompiler::HAdd2,
        &ARBDecompiler::HMul2,
        &ARBDecompiler::HFma2,
        &ARBDecompiler::HAbsolute,
        &ARBDecompiler::HNegate,
        &ARBDecompiler::HClamp,
        &ARBDecompiler::HCastFloat,
        &ARBDecompiler::HUnpack,
        &ARBDecompiler::HMergeF32,
        &ARBDecompiler::HMergeH0,
        &ARBDecompiler::HMergeH1,
        &ARBDecompiler::HPack2,

        &ARBDecompiler::LogicalAssign,
        &ARBDecompiler::Binary<AND_U>,
        &ARBDecompiler::Binary<OR_U>,
        &ARBDecompiler::Binary<XOR_U>,
        &ARBDecompiler::Unary<NOT_U>,
        &ARBDecompiler::LogicalPick2,
        &ARBDecompiler::LogicalAnd2,

        &ARBDecompiler::FloatComparison<SLT_F, false>,
        &ARBDecompiler::FloatComparison<SEQ_F, false>,
        &ARBDecompiler::FloatComparison<SLE_F, false>,
        &ARBDecompiler::FloatComparison<SGT_F, false>,
        &ARBDecompiler::FloatComparison<SNE_F, false>,
        &ARBDecompiler::FloatComparison<SGE_F, false>,
        &ARBDecompiler::FloatOrdered,
        &ARBDecompiler::FloatUnordered,
        &ARBDecompiler::FloatComparison<SLT_F, true>,
        &ARBDecompiler::FloatComparison<SEQ_F, true>,
        &ARBDecompiler::FloatComparison<SLE_F, true>,
        &ARBDecompiler::FloatComparison<SGT_F, true>,
        &ARBDecompiler::FloatComparison<SNE_F, true>,
        &ARBDecompiler::FloatComparison<SGE_F, true>,

        &ARBDecompiler::Binary<SLT_S>,
        &ARBDecompiler::Binary<SEQ_S>,
        &ARBDecompiler::Binary<SLE_S>,
        &ARBDecompiler::Binary<SGT_S>,
        &ARBDecompiler::Binary<SNE_S>,
        &ARBDecompiler::Binary<SGE_S>,

        &ARBDecompiler::Binary<SLT_U>,
        &ARBDecompiler::Binary<SEQ_U>,
        &ARBDecompiler::Binary<SLE_U>,
        &ARBDecompiler::Binary<SGT_U>,
        &ARBDecompiler::Binary<SNE_U>,
        &ARBDecompiler::Binary<SGE_U>,

        &ARBDecompiler::LogicalAddCarry,

        &ARBDecompiler::HalfComparison<SLT_F, false>,
        &ARBDecompiler::HalfComparison<SEQ_F, false>,
        &ARBDecompiler::HalfComparison<SLE_F, false>,
        &ARBDecompiler::HalfComparison<SGT_F, false>,
        &ARBDecompiler::HalfComparison<SNE_F, false>,
        &ARBDecompiler::HalfComparison<SGE_F, false>,
        &ARBDecompiler::HalfComparison<SLT_F, true>,
        &ARBDecompiler::HalfComparison<SEQ_F, true>,
        &ARBDecompiler::HalfComparison<SLE_F, true>,
        &ARBDecompiler::HalfComparison<SGT_F, true>,
        &ARBDecompiler::HalfComparison<SNE_F, true>,
        &ARBDecompiler::HalfComparison<SGE_F, true>,

        &ARBDecompiler::Texture,
        &ARBDecompiler::Texture,
        &ARBDecompiler::TextureGather,
        &ARBDecompiler::TextureQueryDimensions,
        &ARBDecompiler::TextureQueryLod,
        &ARBDecompiler::TexelFetch,
        &ARBDecompiler::TextureGradient,

        &ARBDecompiler::ImageLoad,
        &ARBDecompiler::ImageStore,

        &ARBDecompiler::AtomicImage<ADD, U32>,
        &ARBDecompiler::AtomicImage<AND, U32>,
        &ARBDecompiler::AtomicImage<OR, U32>,
        &ARBDecompiler::AtomicImage<XOR, U32>,
        &ARBDecompiler::AtomicImage<EXCH, U32>,

        &ARBDecompiler::Atomic<EXCH, U32>,
        &ARBDecompiler::Atomic<ADD, U32>,
        &ARBDecompiler::Atomic<MIN, U32>,
        &ARBDecompiler::Atomic<MAX, U32>,
        &ARBDecompiler::Atomic<AND, U32>,
        &ARBDecompiler::Atomic<OR, U32>,
        &ARBDecompiler::Atomic<XOR, U32>,

        &ARBDecompiler::Atomic<EXCH, S32>,
        &ARBDecompiler::Atomic<ADD, S32>,
        &ARBDecompiler::Atomic<MIN, S32>,
        &ARBDecompiler::Atomic<MAX, S32>,
        &ARBDecompiler::Atomic<AND, S32>,
        &ARBDecompiler::Atomic<OR, S32>,
        &ARBDecompiler::Atomic<XOR, S32>,

        &ARBDecompiler::Atomic<ADD, U32>,
        &ARBDecompiler::Atomic<MIN, U32>,
        &ARBDecompiler::Atomic<MAX, U32>,
        &ARBDecompiler::Atomic<AND, U32>,
        &ARBDecompiler::Atomic<OR, U32>,
        &ARBDecompiler::Atomic<XOR, U32>,

        &ARBDecompiler::Atomic<ADD, S32>,
        &ARBDecompiler::Atomic<MIN, S32>,
        &ARBDecompiler::Atomic<MAX, S32>,
        &ARBDecompiler::Atomic<AND, S32>,
        &ARBDecompiler::Atomic<OR, S32>,
        &ARBDecompiler::Atomic<XOR, S32>,

        &ARBDecompiler::Branch,
        &ARBDecompiler::BranchIndirect,
        &ARBDecompiler::PushFlowStack,
        &ARBDecompiler::PopFlowStack,
        &ARBDecompiler::Exit,
        &ARBDecompiler::Discard,

        &ARBDecompiler::EmitVertex,
        &ARBDecompiler::EndPrimitive,

        &ARBDecompiler::InvocationId,
        &ARBDecompiler::YNegate,
        &ARBDecompiler::LocalInvocationId<'x'>,
        &ARBDecompiler::LocalInvocationId<'y'>,
        &ARBDecompiler::LocalInvocationId<'z'>,
        &ARBDecompiler::WorkGroupId<'x'>,
        &ARBDecompiler::WorkGroupId<'y'>,
        &ARBDecompiler::WorkGroupId<'z'>,

        &ARBDecompiler::Unary<TGBALLOT_U>,
        &ARBDecompiler::Unary<TGALL_U>,
        &ARBDecompiler::Unary<TGANY_U>,
        &ARBDecompiler::Unary<TGEQ_U>,

        &ARBDecompiler::ThreadId,
        &ARBDecompiler::ThreadMask<'e', 'q'>,
        &ARBDecompiler::ThreadMask<'g', 'e'>,
        &ARBDecompiler::ThreadMask<'g', 't'>,
        &ARBDecompiler::ThreadMask<'l', 'e'>,
        &ARBDecompiler::ThreadMask<'l', 't'>,
        &ARBDecompiler::ShuffleIndexed,

        &ARBDecompiler::Barrier,
        &ARBDecompiler::MemoryBarrierGroup,
        &ARBDecompiler::MemoryBarrierGlobal,
    };
};

ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
                             ShaderType stage, std::string_view identifier)
    : device{device}, ir{ir}, registry{registry}, stage{stage} {
    AddLine("TEMP RC;");
    AddLine("TEMP FSWZA[4];");
    AddLine("TEMP FSWZB[4];");
    if (ir.IsDecompiled()) {
        DecompileAST();
    } else {
        DecompileBranchMode();
    }
    AddLine("END");

    const std::string code = std::move(shader_source);
    DeclareHeader();
    DeclareVertex();
    DeclareGeometry();
    DeclareFragment();
    DeclareCompute();
    DeclareInputAttributes();
    DeclareOutputAttributes();
    DeclareLocalMemory();
    DeclareGlobalMemory();
    DeclareConstantBuffers();
    DeclareRegisters();
    DeclareTemporaries();
    DeclarePredicates();
    DeclareInternalFlags();

    shader_source += code;
}

std::string_view HeaderStageName(ShaderType stage) {
    switch (stage) {
    case ShaderType::Vertex:
        return "vp";
    case ShaderType::Geometry:
        return "gp";
    case ShaderType::Fragment:
        return "fp";
    case ShaderType::Compute:
        return "cp";
    default:
        UNREACHABLE();
        return "";
    }
}

void ARBDecompiler::DeclareHeader() {
    AddLine("!!NV{}5.0", HeaderStageName(stage));
    // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
    AddLine("OPTION NV_internal;");
    AddLine("OPTION NV_gpu_program_fp64;");
    AddLine("OPTION NV_shader_storage_buffer;");
    AddLine("OPTION NV_shader_thread_group;");
    if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
        AddLine("OPTION NV_shader_thread_shuffle;");
    }
    if (stage == ShaderType::Vertex) {
        if (device.HasNvViewportArray2()) {
            AddLine("OPTION NV_viewport_array2;");
        }
    }
    if (stage == ShaderType::Fragment) {
        AddLine("OPTION ARB_draw_buffers;");
    }
    if (device.HasImageLoadFormatted()) {
        AddLine("OPTION EXT_shader_image_load_formatted;");
    }
}

void ARBDecompiler::DeclareVertex() {
    if (stage != ShaderType::Vertex) {
        return;
    }
    AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
}

void ARBDecompiler::DeclareGeometry() {
    if (stage != ShaderType::Geometry) {
        return;
    }
    const auto& info = registry.GetGraphicsInfo();
    const auto& header = ir.GetHeader();
    AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
    AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
    AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
    AddLine("ATTRIB vertex_position = vertex.position;");
}

void ARBDecompiler::DeclareFragment() {
    if (stage != ShaderType::Fragment) {
        return;
    }
    AddLine("OUTPUT result_color7 = result.color[7];");
    AddLine("OUTPUT result_color6 = result.color[6];");
    AddLine("OUTPUT result_color5 = result.color[5];");
    AddLine("OUTPUT result_color4 = result.color[4];");
    AddLine("OUTPUT result_color3 = result.color[3];");
    AddLine("OUTPUT result_color2 = result.color[2];");
    AddLine("OUTPUT result_color1 = result.color[1];");
    AddLine("OUTPUT result_color0 = result.color;");
}

void ARBDecompiler::DeclareCompute() {
    if (stage != ShaderType::Compute) {
        return;
    }
    const ComputeInfo& info = registry.GetComputeInfo();
    AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
            info.workgroup_size[2]);
    if (info.shared_memory_size_in_words > 0) {
        const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
        AddLine("SHARED_MEMORY {};", size_in_bytes);
        AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
    }
}

void ARBDecompiler::DeclareInputAttributes() {
    if (stage == ShaderType::Compute) {
        return;
    }
    const std::string_view stage_name = StageInputName(stage);
    for (const auto attribute : ir.GetInputAttributes()) {
        if (!IsGenericAttribute(attribute)) {
            continue;
        }
        const u32 index = GetGenericAttributeIndex(attribute);

        std::string_view suffix;
        if (stage == ShaderType::Fragment) {
            const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
            if (input_mode == PixelImap::Unused) {
                return;
            }
            suffix = GetInputFlags(input_mode);
        }
        AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
                index);
    }
}

void ARBDecompiler::DeclareOutputAttributes() {
    if (stage == ShaderType::Compute) {
        return;
    }
    for (const auto attribute : ir.GetOutputAttributes()) {
        if (!IsGenericAttribute(attribute)) {
            continue;
        }
        const u32 index = GetGenericAttributeIndex(attribute);
        AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
    }
}

void ARBDecompiler::DeclareLocalMemory() {
    u64 size = 0;
    if (stage == ShaderType::Compute) {
        size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
    } else {
        size = ir.GetHeader().GetLocalMemorySize();
    }
    if (size == 0) {
        return;
    }
    const u64 element_count = Common::AlignUp(size, 4) / 4;
    AddLine("TEMP lmem[{}];", element_count);
}

void ARBDecompiler::DeclareGlobalMemory() {
    u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
    for (const auto& pair : ir.GetGlobalMemory()) {
        const auto& base = pair.first;
        AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
        ++binding;
    }
}

void ARBDecompiler::DeclareConstantBuffers() {
    u32 binding = 0;
    for (const auto& cbuf : ir.GetConstantBuffers()) {
        AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
        ++binding;
    }
}

void ARBDecompiler::DeclareRegisters() {
    for (const u32 gpr : ir.GetRegisters()) {
        AddLine("TEMP R{};", gpr);
    }
}

void ARBDecompiler::DeclareTemporaries() {
    for (std::size_t i = 0; i < max_temporaries; ++i) {
        AddLine("TEMP T{};", i);
    }
}

void ARBDecompiler::DeclarePredicates() {
    for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
        AddLine("TEMP P{};", static_cast<u64>(pred));
    }
}

void ARBDecompiler::DeclareInternalFlags() {
    for (const char* name : INTERNAL_FLAG_NAMES) {
        AddLine("TEMP {};", name);
    }
}

void ARBDecompiler::InitializeVariables() {
    AddLine("MOV.F32 FSWZA[0], -1;");
    AddLine("MOV.F32 FSWZA[1], 1;");
    AddLine("MOV.F32 FSWZA[2], -1;");
    AddLine("MOV.F32 FSWZA[3], 0;");
    AddLine("MOV.F32 FSWZB[0], -1;");
    AddLine("MOV.F32 FSWZB[1], -1;");
    AddLine("MOV.F32 FSWZB[2], 1;");
    AddLine("MOV.F32 FSWZB[3], -1;");

    if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
        AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
    }
    for (const auto attribute : ir.GetOutputAttributes()) {
        if (!IsGenericAttribute(attribute)) {
            continue;
        }
        const u32 index = GetGenericAttributeIndex(attribute);
        AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
    }
    for (const u32 gpr : ir.GetRegisters()) {
        AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
    }
    for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
        AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
    }
}

void ARBDecompiler::DecompileAST() {
    const u32 num_flow_variables = ir.GetASTNumVariables();
    for (u32 i = 0; i < num_flow_variables; ++i) {
        AddLine("TEMP F{};", i);
    }
    for (u32 i = 0; i < num_flow_variables; ++i) {
        AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
    }

    InitializeVariables();

    VisitAST(ir.GetASTProgram());
}

void ARBDecompiler::DecompileBranchMode() {
    static constexpr u32 FLOW_STACK_SIZE = 20;
    if (!ir.IsFlowStackDisabled()) {
        AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
        AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
        AddLine("TEMP SSY_TOP;");
        AddLine("TEMP PBK_TOP;");
    }

    AddLine("TEMP PC;");

    if (!ir.IsFlowStackDisabled()) {
        AddLine("MOV.U SSY_TOP.x, 0;");
        AddLine("MOV.U PBK_TOP.x, 0;");
    }

    InitializeVariables();

    const auto basic_block_end = ir.GetBasicBlocks().end();
    auto basic_block_it = ir.GetBasicBlocks().begin();
    const u32 first_address = basic_block_it->first;
    AddLine("MOV.U PC.x, {};", first_address);

    AddLine("REP;");

    std::size_t num_blocks = 0;
    while (basic_block_it != basic_block_end) {
        const auto& [address, bb] = *basic_block_it;
        ++num_blocks;

        AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
        AddLine("IF NE.x;");

        VisitBlock(bb);

        ++basic_block_it;

        if (basic_block_it != basic_block_end) {
            const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
            if (!op || op->GetCode() != OperationCode::Branch) {
                const u32 next_address = basic_block_it->first;
                AddLine("MOV.U PC.x, {};", next_address);
                AddLine("CONT;");
            }
        }

        AddLine("ELSE;");
    }
    AddLine("RET;");
    while (num_blocks--) {
        AddLine("ENDIF;");
    }

    AddLine("ENDREP;");
}

void ARBDecompiler::VisitAST(const ASTNode& node) {
    if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
        for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
            VisitAST(current);
        }
    } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
        const std::string condition = VisitExpression(ast->condition);
        ResetTemporaries();

        AddLine("MOVC.U RC.x, {};", condition);
        AddLine("IF NE.x;");
        for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
            VisitAST(current);
        }
        AddLine("ENDIF;");
    } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
        AddLine("ELSE;");
        for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
            VisitAST(current);
        }
    } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
        VisitBlock(ast->nodes);
    } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
        AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
        ResetTemporaries();
    } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
        const std::string condition = VisitExpression(ast->condition);
        ResetTemporaries();
        AddLine("REP;");
        for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
            VisitAST(current);
        }
        AddLine("MOVC.U RC.x, {};", condition);
        AddLine("BRK (NE.x);");
        AddLine("ENDREP;");
    } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
        const bool is_true = ExprIsTrue(ast->condition);
        if (!is_true) {
            AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
            AddLine("IF NE.x;");
            ResetTemporaries();
        }
        if (ast->kills) {
            AddLine("KIL TR;");
        } else {
            Exit();
        }
        if (!is_true) {
            AddLine("ENDIF;");
        }
    } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
        if (ExprIsTrue(ast->condition)) {
            AddLine("BRK;");
        } else {
            AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
            AddLine("BRK (NE.x);");
            ResetTemporaries();
        }
    } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
        // Nothing to do
    } else {
        UNREACHABLE();
    }
}

std::string ARBDecompiler::VisitExpression(const Expr& node) {
    if (const auto expr = std::get_if<ExprAnd>(&*node)) {
        std::string result = AllocTemporary();
        AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
                VisitExpression(expr->operand2));
        return result;
    }
    if (const auto expr = std::get_if<ExprOr>(&*node)) {
        std::string result = AllocTemporary();
        AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
                VisitExpression(expr->operand2));
        return result;
    }
    if (const auto expr = std::get_if<ExprNot>(&*node)) {
        std::string result = AllocTemporary();
        AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
        return result;
    }
    if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
        return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
    }
    if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
        return Visit(ir.GetConditionCode(expr->cc));
    }
    if (const auto expr = std::get_if<ExprVar>(&*node)) {
        return fmt::format("F{}.x", expr->var_index);
    }
    if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
        return expr->value ? "0xffffffff" : "0";
    }
    if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
        std::string result = AllocTemporary();
        AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
        return result;
    }
    UNREACHABLE();
    return "0";
}

void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
    for (const auto& node : bb) {
        Visit(node);
    }
}

std::string ARBDecompiler::Visit(const Node& node) {
    if (const auto operation = std::get_if<OperationNode>(&*node)) {
        if (const auto amend_index = operation->GetAmendIndex()) {
            Visit(ir.GetAmendNode(*amend_index));
        }
        const std::size_t index = static_cast<std::size_t>(operation->GetCode());
        if (index >= OPERATION_DECOMPILERS.size()) {
            UNREACHABLE_MSG("Out of bounds operation: {}", index);
            return {};
        }
        const auto decompiler = OPERATION_DECOMPILERS[index];
        if (decompiler == nullptr) {
            UNREACHABLE_MSG("Undefined operation: {}", index);
            return {};
        }
        return (this->*decompiler)(*operation);
    }

    if (const auto gpr = std::get_if<GprNode>(&*node)) {
        const u32 index = gpr->GetIndex();
        if (index == Register::ZeroIndex) {
            return "{0, 0, 0, 0}.x";
        }
        return fmt::format("R{}.x", index);
    }

    if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
        return fmt::format("CV{}.x", cv->GetIndex());
    }

    if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
        std::string temporary = AllocTemporary();
        AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
        return temporary;
    }

    if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
        std::string temporary = AllocTemporary();
        switch (const auto index = predicate->GetIndex(); index) {
        case Tegra::Shader::Pred::UnusedIndex:
            AddLine("MOV.S {}, -1;", temporary);
            break;
        case Tegra::Shader::Pred::NeverExecute:
            AddLine("MOV.S {}, 0;", temporary);
            break;
        default:
            AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
            break;
        }
        if (predicate->IsNegated()) {
            AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
        }
        return temporary;
    }

    if (const auto abuf = std::get_if<AbufNode>(&*node)) {
        if (abuf->IsPhysicalBuffer()) {
            UNIMPLEMENTED_MSG("Physical buffers are not implemented");
            return "{0, 0, 0, 0}.x";
        }

        const auto buffer_index = [this, &abuf]() -> std::string {
            if (stage != ShaderType::Geometry) {
                return "";
            }
            return fmt::format("[{}]", Visit(abuf->GetBuffer()));
        };

        const Attribute::Index index = abuf->GetIndex();
        const u32 element = abuf->GetElement();
        const char swizzle = Swizzle(element);
        switch (index) {
        case Attribute::Index::Position: {
            if (stage == ShaderType::Geometry) {
                return fmt::format("{}_position[{}].{}", StageInputName(stage),
                                   Visit(abuf->GetBuffer()), swizzle);
            } else {
                return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
            }
        }
        case Attribute::Index::TessCoordInstanceIDVertexID:
            ASSERT(stage == ShaderType::Vertex);
            switch (element) {
            case 2:
                return "vertex.instance";
            case 3:
                return "vertex.id";
            }
            UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
            break;
        case Attribute::Index::PointCoord:
            switch (element) {
            case 0:
                return "fragment.pointcoord.x";
            case 1:
                return "fragment.pointcoord.y";
            }
            UNIMPLEMENTED();
            break;
        case Attribute::Index::FrontFacing: {
            ASSERT(stage == ShaderType::Fragment);
            ASSERT(element == 3);
            const std::string temporary = AllocVectorTemporary();
            AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
            AddLine("MOV.U.CC RC.x, -RC;");
            AddLine("MOV.S {}.x, 0;", temporary);
            AddLine("MOV.S {}.x (NE.x), -1;", temporary);
            return fmt::format("{}.x", temporary);
        }
        default:
            if (IsGenericAttribute(index)) {
                if (stage == ShaderType::Geometry) {
                    return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
                                       Visit(abuf->GetBuffer()), swizzle);
                } else {
                    return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
                                       GetGenericAttributeIndex(index), swizzle);
                }
            }
            UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
            break;
        }
        return "{0, 0, 0, 0}.x";
    }

    if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
        std::string offset_string;
        const auto& offset = cbuf->GetOffset();
        if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
            offset_string = std::to_string(imm->GetValue());
        } else {
            offset_string = Visit(offset);
        }
        std::string temporary = AllocTemporary();
        AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
        return temporary;
    }

    if (const auto gmem = std::get_if<GmemNode>(&*node)) {
        std::string temporary = AllocTemporary();
        AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
                Visit(gmem->GetBaseAddress()));
        AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
                temporary);
        return temporary;
    }

    if (const auto lmem = std::get_if<LmemNode>(&*node)) {
        std::string temporary = Visit(lmem->GetAddress());
        AddLine("SHR.U {}, {}, 2;", temporary, temporary);
        AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
        return temporary;
    }

    if (const auto smem = std::get_if<SmemNode>(&*node)) {
        std::string temporary = Visit(smem->GetAddress());
        AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
        return temporary;
    }

    if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
        const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
        return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
    }

    if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
        if (const auto amend_index = conditional->GetAmendIndex()) {
            Visit(ir.GetAmendNode(*amend_index));
        }
        AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
        AddLine("IF NE.x;");
        VisitBlock(conditional->GetCode());
        AddLine("ENDIF;");
        return {};
    }

    if (const auto cmt = std::get_if<CommentNode>(&*node)) {
        // Uncommenting this will generate invalid code. GLASM lacks comments.
        // AddLine("// {}", cmt->GetText());
        return {};
    }

    UNIMPLEMENTED();
    return {};
}

std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    UNIMPLEMENTED_IF(meta.sampler.is_indexed);
    UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
                     meta.sampler.type == Tegra::Shader::TextureType::TextureCube);

    const std::size_t count = operation.GetOperandsCount();
    std::string temporary = AllocVectorTemporary();
    std::size_t i = 0;
    for (; i < count; ++i) {
        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
    }
    if (meta.sampler.is_array) {
        AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
    }
    if (meta.sampler.is_shadow) {
        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
    }
    return {std::move(temporary), i};
}

std::string ARBDecompiler::BuildAoffi(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    if (meta.aoffi.empty()) {
        return {};
    }
    const std::string temporary = AllocVectorTemporary();
    std::size_t i = 0;
    for (auto& node : meta.aoffi) {
        AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
    }
    return fmt::format(", offset({})", temporary);
}

void ARBDecompiler::Exit() {
    if (stage != ShaderType::Fragment) {
        AddLine("RET;");
        return;
    }

    const auto safe_get_register = [this](u32 reg) -> std::string {
        // TODO(Rodrigo): Replace with contains once C++20 releases
        const auto& used_registers = ir.GetRegisters();
        if (used_registers.find(reg) != used_registers.end()) {
            return fmt::format("R{}.x", reg);
        }
        return "{0, 0, 0, 0}.x";
    };

    const auto& header = ir.GetHeader();
    u32 current_reg = 0;
    for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
        for (u32 component = 0; component < 4; ++component) {
            if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
                continue;
            }
            AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
                    safe_get_register(current_reg));
            ++current_reg;
        }
    }
    if (header.ps.omap.depth) {
        AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
    }

    AddLine("RET;");
}

std::string ARBDecompiler::Assign(Operation operation) {
    const Node& dest = operation[0];
    const Node& src = operation[1];

    std::string dest_name;
    if (const auto gpr = std::get_if<GprNode>(&*dest)) {
        if (gpr->GetIndex() == Register::ZeroIndex) {
            // Writing to Register::ZeroIndex is a no op
            return {};
        }
        dest_name = fmt::format("R{}.x", gpr->GetIndex());
    } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
        const u32 element = abuf->GetElement();
        const char swizzle = Swizzle(element);
        switch (const Attribute::Index index = abuf->GetIndex()) {
        case Attribute::Index::Position:
            dest_name = fmt::format("result.position.{}", swizzle);
            break;
        case Attribute::Index::LayerViewportPointSize:
            switch (element) {
            case 0:
                UNIMPLEMENTED();
                return {};
            case 1:
            case 2:
                if (!device.HasNvViewportArray2()) {
                    LOG_ERROR(
                        Render_OpenGL,
                        "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
                    return {};
                }
                dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
                break;
            case 3:
                dest_name = "result.pointsize.x";
                break;
            }
            break;
        case Attribute::Index::ClipDistances0123:
            dest_name = fmt::format("result.clip[{}].x", element);
            break;
        case Attribute::Index::ClipDistances4567:
            dest_name = fmt::format("result.clip[{}].x", element + 4);
            break;
        default:
            if (!IsGenericAttribute(index)) {
                UNREACHABLE();
                return {};
            }
            dest_name =
                fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
            break;
        }
    } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
        const std::string address = Visit(lmem->GetAddress());
        AddLine("SHR.U {}, {}, 2;", address, address);
        dest_name = fmt::format("lmem[{}].x", address);
    } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
        AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
        ResetTemporaries();
        return {};
    } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
        const std::string temporary = AllocTemporary();
        AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
                Visit(gmem->GetBaseAddress()));
        AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
                temporary);
        ResetTemporaries();
        return {};
    } else {
        UNREACHABLE();
        ResetTemporaries();
        return {};
    }

    AddLine("MOV.U {}, {};", dest_name, Visit(src));
    ResetTemporaries();
    return {};
}

std::string ARBDecompiler::Select(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
            Visit(operation[2]));
    return temporary;
}

std::string ARBDecompiler::FClamp(Operation operation) {
    // 1.0f in hex, replace with std::bit_cast on C++20
    static constexpr u32 POSITIVE_ONE = 0x3f800000;

    std::string temporary = AllocTemporary();
    const Node& value = operation[0];
    const Node& low = operation[1];
    const Node& high = operation[2];
    const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
    const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
    if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
        AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
    } else {
        AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
        AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
    }
    return temporary;
}

std::string ARBDecompiler::FCastHalf0(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::FCastHalf1(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
    AddLine("MOV {}.x, {}.y;", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::FSqrt(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
    AddLine("RCP.F32 {}, {};", temporary, temporary);
    return temporary;
}

std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    if (!device.HasWarpIntrinsics()) {
        LOG_ERROR(Render_OpenGL,
                  "NV_shader_thread_shuffle is missing. Kepler or better is required.");
        AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
        return fmt::format("{}.x", temporary);
    }

    AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
    AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
    AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
    AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
    AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
    AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
    AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HAdd2(Operation operation) {
    const std::string tmp1 = AllocVectorTemporary();
    const std::string tmp2 = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
    AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
    AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
    return fmt::format("{}.x", tmp1);
}

std::string ARBDecompiler::HMul2(Operation operation) {
    const std::string tmp1 = AllocVectorTemporary();
    const std::string tmp2 = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
    AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
    AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
    return fmt::format("{}.x", tmp1);
}

std::string ARBDecompiler::HFma2(Operation operation) {
    const std::string tmp1 = AllocVectorTemporary();
    const std::string tmp2 = AllocVectorTemporary();
    const std::string tmp3 = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
    AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
    AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
    AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
    return fmt::format("{}.x", tmp1);
}

std::string ARBDecompiler::HAbsolute(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HNegate(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
    AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
    AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
    AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HClamp(Operation operation) {
    const std::string tmp1 = AllocVectorTemporary();
    const std::string tmp2 = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
    AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
    AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
    AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
    AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
    AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
    AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
    return fmt::format("{}.x", tmp1);
}

std::string ARBDecompiler::HCastFloat(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
    AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HUnpack(Operation operation) {
    const std::string operand = Visit(operation[0]);
    switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
    case Tegra::Shader::HalfType::H0_H1:
        return operand;
    case Tegra::Shader::HalfType::F32: {
        const std::string temporary = AllocVectorTemporary();
        AddLine("MOV.U {}.x, {};", temporary, operand);
        AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
        AddLine("PK2H.F {}.x, {};", temporary, temporary);
        return fmt::format("{}.x", temporary);
    }
    case Tegra::Shader::HalfType::H0_H0: {
        const std::string temporary = AllocVectorTemporary();
        AddLine("UP2H.F {}.xy, {};", temporary, operand);
        AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
        AddLine("PK2H.F {}.x, {};", temporary, temporary);
        return fmt::format("{}.x", temporary);
    }
    case Tegra::Shader::HalfType::H1_H1: {
        const std::string temporary = AllocVectorTemporary();
        AddLine("UP2H.F {}.xy, {};", temporary, operand);
        AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
        AddLine("PK2H.F {}.x, {};", temporary, temporary);
        return fmt::format("{}.x", temporary);
    }
    }
    UNREACHABLE();
    return "{0, 0, 0, 0}.x";
}

std::string ARBDecompiler::HMergeF32(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HMergeH0(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
    AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HMergeH1(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
    AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HPack2(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
    AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::LogicalAssign(Operation operation) {
    const Node& dest = operation[0];
    const Node& src = operation[1];

    std::string target;

    if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
        ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");

        const Tegra::Shader::Pred index = pred->GetIndex();
        switch (index) {
        case Tegra::Shader::Pred::NeverExecute:
        case Tegra::Shader::Pred::UnusedIndex:
            // Writing to these predicates is a no-op
            return {};
        }
        target = fmt::format("P{}.x", static_cast<u64>(index));
    } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
        const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
        target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
    } else {
        UNREACHABLE();
        ResetTemporaries();
        return {};
    }

    AddLine("MOV.U {}, {};", target, Visit(src));
    ResetTemporaries();
    return {};
}

std::string ARBDecompiler::LogicalPick2(Operation operation) {
    std::string temporary = AllocTemporary();
    const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
    AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
    return temporary;
}

std::string ARBDecompiler::LogicalAnd2(Operation operation) {
    std::string temporary = AllocTemporary();
    const std::string op = Visit(operation[0]);
    AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
    return temporary;
}

std::string ARBDecompiler::FloatOrdered(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
    AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
    AddLine("MOV.S {}, -1;", temporary);
    AddLine("MOV.S {} (NAN.x), 0;", temporary);
    AddLine("MOV.S {} (NAN.y), 0;", temporary);
    return temporary;
}

std::string ARBDecompiler::FloatUnordered(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
    AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
    AddLine("MOV.S {}, 0;", temporary);
    AddLine("MOV.S {} (NAN.x), -1;", temporary);
    AddLine("MOV.S {} (NAN.y), -1;", temporary);
    return temporary;
}

std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
    AddLine("MOV.S {}, 0;", temporary);
    AddLine("IF CF.x;");
    AddLine("MOV.S {}, -1;", temporary);
    AddLine("ENDIF;");
    return temporary;
}

std::string ARBDecompiler::Texture(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
    const auto [temporary, swizzle] = BuildCoords(operation);

    std::string_view opcode = "TEX";
    std::string extra;
    if (meta.bias) {
        ASSERT(!meta.lod);
        opcode = "TXB";

        if (swizzle < 4) {
            AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
        } else {
            const std::string bias = AllocTemporary();
            AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
            extra = fmt::format(" {},", bias);
        }
    }
    if (meta.lod) {
        ASSERT(!meta.bias);
        opcode = "TXL";

        if (swizzle < 4) {
            AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
        } else {
            const std::string lod = AllocTemporary();
            AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
            extra = fmt::format(" {},", lod);
        }
    }

    AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
            TextureType(meta), BuildAoffi(operation));
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TextureGather(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
    const auto [temporary, swizzle] = BuildCoords(operation);

    std::string comp;
    if (!meta.sampler.is_shadow) {
        const auto& immediate = std::get<ImmediateNode>(*meta.component);
        comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
    }

    AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
            TextureType(meta), BuildAoffi(operation));
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const std::string temporary = AllocVectorTemporary();
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;

    ASSERT(!meta.sampler.is_array);

    const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
    AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TextureQueryLod(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const std::string temporary = AllocVectorTemporary();
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;

    ASSERT(!meta.sampler.is_array);

    const std::size_t count = operation.GetOperandsCount();
    for (std::size_t i = 0; i < count; ++i) {
        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
    }
    AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
    AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
    AddLine("TRUNC.S {}, {};", temporary, temporary);
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TexelFetch(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
    const auto [temporary, swizzle] = BuildCoords(operation);

    if (!meta.sampler.is_buffer) {
        ASSERT(swizzle < 4);
        AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
    }
    AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
            BuildAoffi(operation));
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TextureGradient(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
    const std::string ddx = AllocVectorTemporary();
    const std::string ddy = AllocVectorTemporary();
    const std::string coord = BuildCoords(operation).first;

    const std::size_t num_components = meta.derivates.size() / 2;
    for (std::size_t index = 0; index < num_components; ++index) {
        const char swizzle = Swizzle(index);
        AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
        AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
    }

    const std::string_view result = coord;
    AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
            TextureType(meta), BuildAoffi(operation));
    AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
    return fmt::format("{}.x", result);
}

std::string ARBDecompiler::ImageLoad(Operation operation) {
    const auto& meta = std::get<MetaImage>(operation.GetMeta());
    const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
    const std::size_t count = operation.GetOperandsCount();
    const std::string_view type = ImageType(meta.image.type);

    const std::string temporary = AllocVectorTemporary();
    for (std::size_t i = 0; i < count; ++i) {
        AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
    }
    AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
    AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::ImageStore(Operation operation) {
    const auto& meta = std::get<MetaImage>(operation.GetMeta());
    const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
    const std::size_t num_coords = operation.GetOperandsCount();
    const std::size_t num_values = meta.values.size();
    const std::string_view type = ImageType(meta.image.type);

    const std::string coord = AllocVectorTemporary();
    const std::string value = AllocVectorTemporary();
    for (std::size_t i = 0; i < num_coords; ++i) {
        AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
    }
    for (std::size_t i = 0; i < num_values; ++i) {
        AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
    }
    AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
    return {};
}

std::string ARBDecompiler::Branch(Operation operation) {
    const auto target = std::get<ImmediateNode>(*operation[0]);
    AddLine("MOV.U PC.x, {};", target.GetValue());
    AddLine("CONT;");
    return {};
}

std::string ARBDecompiler::BranchIndirect(Operation operation) {
    AddLine("MOV.U PC.x, {};", Visit(operation[0]));
    AddLine("CONT;");
    return {};
}

std::string ARBDecompiler::PushFlowStack(Operation operation) {
    const auto stack = std::get<MetaStackClass>(operation.GetMeta());
    const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
    const std::string_view stack_name = StackName(stack);
    AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
    AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
    return {};
}

std::string ARBDecompiler::PopFlowStack(Operation operation) {
    const auto stack = std::get<MetaStackClass>(operation.GetMeta());
    const std::string_view stack_name = StackName(stack);
    AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
    AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
    AddLine("CONT;");
    return {};
}

std::string ARBDecompiler::Exit(Operation) {
    Exit();
    return {};
}

std::string ARBDecompiler::Discard(Operation) {
    AddLine("KIL TR;");
    return {};
}

std::string ARBDecompiler::EmitVertex(Operation) {
    AddLine("EMIT;");
    return {};
}

std::string ARBDecompiler::EndPrimitive(Operation) {
    AddLine("ENDPRIM;");
    return {};
}

std::string ARBDecompiler::InvocationId(Operation) {
    return "primitive.invocation";
}

std::string ARBDecompiler::YNegate(Operation) {
    LOG_WARNING(Render_OpenGL, "(STUBBED)");
    const std::string temporary = AllocTemporary();
    AddLine("MOV.F {}, 1;", temporary);
    return temporary;
}

std::string ARBDecompiler::ThreadId(Operation) {
    return fmt::format("{}.threadid", StageInputName(stage));
}

std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
    if (!device.HasWarpIntrinsics()) {
        LOG_ERROR(Render_OpenGL,
                  "NV_shader_thread_shuffle is missing. Kepler or better is required.");
        return Visit(operation[0]);
    }
    const std::string temporary = AllocVectorTemporary();
    AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
            Visit(operation[1]));
    AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::Barrier(Operation) {
    if (!ir.IsDecompiled()) {
        LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled");
        return {};
    }
    AddLine("BAR;");
    return {};
}

std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
    AddLine("MEMBAR.CTA;");
    return {};
}

std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
    AddLine("MEMBAR;");
    return {};
}

} // Anonymous namespace

std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
                                    const VideoCommon::Shader::Registry& registry,
                                    Tegra::Engines::ShaderType stage, std::string_view identifier) {
    return ARBDecompiler(device, ir, registry, stage, identifier).Code();
}

} // namespace OpenGL