summaryrefslogblamecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
blob: 3e4d88c302d91b8485979d63e3427b93a720081e (plain) (tree)








































                                                                                                   

                                                      





























                                                                                             
                                                                     


















































                                                                                                    
                                                   












                                                                       
                                                                   














































                                                     

                           

                                                                                                 





                              

                              


























                                                  
                                                                             
                                      
                                                          




















































                                                  
                                                 





                                                                                          
                                                 






                                                                                             
                                                 






                                                                                                 
                                                 

























                                                               
                                                  


































                                                                                    
                                                                                           
                                                      
                                          



                                                                       
                                                 

                                
                            
                                                                      

                                                 
                          






                                                                               


                                
                                                                                                   




                                               




                                             
                                                 









                                                                             
                                                 
















































                                                                                            






                                                                                        






                                                                         



                                                           

                                      
                                 









                                    




                                                        














































































































































































































































































































                                                                                               


                                                                                                   

                         
                        

                              









































                                                      








                                                   




                                                                                        
























































                                                                                   








                                                                                       
     


                                                            





















































                                                                                                    
                                                           
                          
                                                                                           




















                                                                                     


                                                            














                                                               








                                     




























































































                                                                                              

                                                                                     



                                               
                                                                                                  


                              
                                                                                     
                         
                                                                                                  

                              



                                                                                           
                           

                                                                                       

                           
                                                                                                   




                                               

                                                                                        
                       
                                                                                


                                
                                






                               

                                                                                      

                            
                                                                               










                                                                         
                                                         
                                              




                                                                             
                                              




                                                                            
                                              















                                                                                 
                                              











































                                                                                 
                                                 




                                                                    
                                                 






















                                                                            


















































                                                                                              
                                                                         












                                                                    
                                                 




                                                                                       
                                                 

                                                                                  



                                                          
                                                          





                                                               
                                                          



















                                                                                     
                                                                             








                                                                              
                                                                                                   

                                                                  
 

                                                                                          






                                                                                

                                                                              

                                 








                                                                                   
     
                                     














                                                                          
                                                                      


                                                                      
                                                                     



                                                           
                                                   


                                                                         


                                                                    


                                        






                                                                   
                                              





















































































                                                                                               
                            
                                                                             
                          













                                                        
                                             








                                                                                         
                                             


                                     

                                                                    






















                                                                                                  
                                             





                                                               






                                                                                            
 







                                                                                         










































































                                                                 
                                              




























































































                                                                                     
                                             





                                                                                
                                             





                                                              
                                             








                                                                
                                             








                                                                 
                                             










                                                                                      
                                                                     



























                                                                    
                                                                                              







                                                                                      
                                                                     








                                                                                         
                                                                            




































                                                                                                   
                                                                     




                                                               
                                                                                                 









                                                                                      
                                                                  









































































































                                                                                            
                                             





















                                                                                        






















                                                                                                    
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#include <algorithm>
#include <array>
#include <cstddef>
#include <string>
#include <string_view>
#include <utility>
#include <variant>

#include <fmt/format.h>

#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"

// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
// GLASM lacks booleans, so they have to be implemented as integers.
// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
// select between two values, because -1 will be evaluated as true and 0 as false.

namespace OpenGL {

namespace {

using Tegra::Engines::ShaderType;
using Tegra::Shader::Attribute;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using namespace VideoCommon::Shader;
using Operation = const OperationNode&;

constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};

char Swizzle(std::size_t component) {
    static constexpr std::string_view SWIZZLE{"xyzw"};
    return SWIZZLE.at(component);
}

constexpr bool IsGenericAttribute(Attribute::Index index) {
    return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
}

u32 GetGenericAttributeIndex(Attribute::Index index) {
    ASSERT(IsGenericAttribute(index));
    return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
}

std::string_view Modifiers(Operation operation) {
    const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
    if (meta && meta->precise) {
        return ".PREC";
    }
    return "";
}

std::string_view GetInputFlags(PixelImap attribute) {
    switch (attribute) {
    case PixelImap::Perspective:
        return "";
    case PixelImap::Constant:
        return "FLAT ";
    case PixelImap::ScreenLinear:
        return "NOPERSPECTIVE ";
    case PixelImap::Unused:
        break;
    }
    UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
    return {};
}

std::string_view ImageType(Tegra::Shader::ImageType image_type) {
    switch (image_type) {
    case Tegra::Shader::ImageType::Texture1D:
        return "1D";
    case Tegra::Shader::ImageType::TextureBuffer:
        return "BUFFER";
    case Tegra::Shader::ImageType::Texture1DArray:
        return "ARRAY1D";
    case Tegra::Shader::ImageType::Texture2D:
        return "2D";
    case Tegra::Shader::ImageType::Texture2DArray:
        return "ARRAY2D";
    case Tegra::Shader::ImageType::Texture3D:
        return "3D";
    }
    UNREACHABLE();
    return {};
}

std::string_view StackName(MetaStackClass stack) {
    switch (stack) {
    case MetaStackClass::Ssy:
        return "SSY";
    case MetaStackClass::Pbk:
        return "PBK";
    }
    UNREACHABLE();
    return "";
};

std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
    switch (topology) {
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
        return "POINTS";
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
        return "LINES";
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
        return "LINES_ADJACENCY";
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
        return "TRIANGLES";
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
    case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
        return "TRIANGLES_ADJACENCY";
    default:
        UNIMPLEMENTED_MSG("topology={}", topology);
        return "POINTS";
    }
}

std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
    switch (topology) {
    case Tegra::Shader::OutputTopology::PointList:
        return "POINTS";
    case Tegra::Shader::OutputTopology::LineStrip:
        return "LINE_STRIP";
    case Tegra::Shader::OutputTopology::TriangleStrip:
        return "TRIANGLE_STRIP";
    default:
        UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
        return "points";
    }
}

std::string_view StageInputName(ShaderType stage) {
    switch (stage) {
    case ShaderType::Vertex:
    case ShaderType::Geometry:
        return "vertex";
    case ShaderType::Fragment:
        return "fragment";
    case ShaderType::Compute:
        return "invocation";
    default:
        UNREACHABLE();
        return "";
    }
}

std::string TextureType(const MetaTexture& meta) {
    if (meta.sampler.is_buffer) {
        return "BUFFER";
    }
    std::string type;
    if (meta.sampler.is_shadow) {
        type += "SHADOW";
    }
    if (meta.sampler.is_array) {
        type += "ARRAY";
    }
    type += [&meta] {
        switch (meta.sampler.type) {
        case Tegra::Shader::TextureType::Texture1D:
            return "1D";
        case Tegra::Shader::TextureType::Texture2D:
            return "2D";
        case Tegra::Shader::TextureType::Texture3D:
            return "3D";
        case Tegra::Shader::TextureType::TextureCube:
            return "CUBE";
        }
        UNREACHABLE();
        return "2D";
    }();
    return type;
}

class ARBDecompiler final {
public:
    explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
                           ShaderType stage_, std::string_view identifier);

    std::string Code() const {
        return shader_source;
    }

private:
    void DefineGlobalMemory();

    void DeclareHeader();
    void DeclareVertex();
    void DeclareGeometry();
    void DeclareFragment();
    void DeclareCompute();
    void DeclareInputAttributes();
    void DeclareOutputAttributes();
    void DeclareLocalMemory();
    void DeclareGlobalMemory();
    void DeclareConstantBuffers();
    void DeclareRegisters();
    void DeclareTemporaries();
    void DeclarePredicates();
    void DeclareInternalFlags();

    void InitializeVariables();

    void DecompileAST();
    void DecompileBranchMode();

    void VisitAST(const ASTNode& node);
    std::string VisitExpression(const Expr& node);

    void VisitBlock(const NodeBlock& bb);

    std::string Visit(const Node& node);

    std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
    std::string BuildAoffi(Operation);
    std::string GlobalMemoryPointer(const GmemNode& gmem);
    void Exit();

    std::string Assign(Operation);
    std::string Select(Operation);
    std::string FClamp(Operation);
    std::string FCastHalf0(Operation);
    std::string FCastHalf1(Operation);
    std::string FSqrt(Operation);
    std::string FSwizzleAdd(Operation);
    std::string HAdd2(Operation);
    std::string HMul2(Operation);
    std::string HFma2(Operation);
    std::string HAbsolute(Operation);
    std::string HNegate(Operation);
    std::string HClamp(Operation);
    std::string HCastFloat(Operation);
    std::string HUnpack(Operation);
    std::string HMergeF32(Operation);
    std::string HMergeH0(Operation);
    std::string HMergeH1(Operation);
    std::string HPack2(Operation);
    std::string LogicalAssign(Operation);
    std::string LogicalPick2(Operation);
    std::string LogicalAnd2(Operation);
    std::string FloatOrdered(Operation);
    std::string FloatUnordered(Operation);
    std::string LogicalAddCarry(Operation);
    std::string Texture(Operation);
    std::string TextureGather(Operation);
    std::string TextureQueryDimensions(Operation);
    std::string TextureQueryLod(Operation);
    std::string TexelFetch(Operation);
    std::string TextureGradient(Operation);
    std::string ImageLoad(Operation);
    std::string ImageStore(Operation);
    std::string Branch(Operation);
    std::string BranchIndirect(Operation);
    std::string PushFlowStack(Operation);
    std::string PopFlowStack(Operation);
    std::string Exit(Operation);
    std::string Discard(Operation);
    std::string EmitVertex(Operation);
    std::string EndPrimitive(Operation);
    std::string InvocationId(Operation);
    std::string YNegate(Operation);
    std::string ThreadId(Operation);
    std::string ShuffleIndexed(Operation);
    std::string Barrier(Operation);
    std::string MemoryBarrierGroup(Operation);
    std::string MemoryBarrierGlobal(Operation);

    template <const std::string_view& op>
    std::string Unary(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
        return temporary;
    }

    template <const std::string_view& op>
    std::string Binary(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
                Visit(operation[1]));
        return temporary;
    }

    template <const std::string_view& op>
    std::string Trinary(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
                Visit(operation[1]), Visit(operation[2]));
        return temporary;
    }

    template <const std::string_view& op, bool unordered>
    std::string FloatComparison(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
        AddLine("MOV.S {}, 0;", temporary);
        AddLine("MOV.S {} (NE.x), -1;", temporary);

        const std::string op_a = Visit(operation[0]);
        const std::string op_b = Visit(operation[1]);
        if constexpr (unordered) {
            AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
            AddLine("TRUNC.U.CC RC.x, RC.x;");
            AddLine("MOV.S {} (NE.x), -1;", temporary);
            AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
            AddLine("TRUNC.U.CC RC.x, RC.x;");
            AddLine("MOV.S {} (NE.x), -1;", temporary);
        } else if (op == SNE_F) {
            AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
            AddLine("TRUNC.U.CC RC.x, RC.x;");
            AddLine("MOV.S {} (NE.x), 0;", temporary);
            AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
            AddLine("TRUNC.U.CC RC.x, RC.x;");
            AddLine("MOV.S {} (NE.x), 0;", temporary);
        }
        return temporary;
    }

    template <const std::string_view& op, bool is_nan>
    std::string HalfComparison(Operation operation) {
        std::string tmp1 = AllocVectorTemporary();
        const std::string tmp2 = AllocVectorTemporary();
        const std::string op_a = Visit(operation[0]);
        const std::string op_b = Visit(operation[1]);
        AddLine("UP2H.F {}, {};", tmp1, op_a);
        AddLine("UP2H.F {}, {};", tmp2, op_b);
        AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
        AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
        AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
        AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
        AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
        if constexpr (is_nan) {
            AddLine("MOVC.F RC.x, {};", op_a);
            AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
            AddLine("MOVC.F RC.x, {};", op_b);
            AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
        }
        return tmp1;
    }

    template <const std::string_view& op, const std::string_view& type>
    std::string AtomicImage(Operation operation) {
        const auto& meta = std::get<MetaImage>(operation.GetMeta());
        const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
        const std::size_t num_coords = operation.GetOperandsCount();
        const std::size_t num_values = meta.values.size();

        const std::string coord = AllocVectorTemporary();
        const std::string value = AllocVectorTemporary();
        for (std::size_t i = 0; i < num_coords; ++i) {
            AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
        }
        for (std::size_t i = 0; i < num_values; ++i) {
            AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
        }

        AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
                image_id, ImageType(meta.image.type));
        return fmt::format("{}.x", coord);
    }

    template <const std::string_view& op, const std::string_view& type>
    std::string Atomic(Operation operation) {
        std::string temporary = AllocTemporary();
        std::string address;
        std::string_view opname;
        bool robust = false;
        if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
            address = GlobalMemoryPointer(*gmem);
            opname = "ATOM";
            robust = true;
        } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
            address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
            opname = "ATOMS";
        } else {
            UNREACHABLE();
            return "{0, 0, 0, 0}";
        }
        if (robust) {
            AddLine("IF NE.x;");
        }
        AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
        if (robust) {
            AddLine("ELSE;");
            AddLine("MOV.S {}, 0;", temporary);
            AddLine("ENDIF;");
        }
        return temporary;
    }

    template <char type>
    std::string Negate(Operation operation) {
        std::string temporary = AllocTemporary();
        if constexpr (type == 'F') {
            AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
        } else {
            AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
        }
        return temporary;
    }

    template <char type>
    std::string Absolute(Operation operation) {
        std::string temporary = AllocTemporary();
        AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
        return temporary;
    }

    template <char type>
    std::string BitfieldInsert(Operation operation) {
        const std::string temporary = AllocVectorTemporary();
        AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
        AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
        AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
                Visit(operation[0]));
        return fmt::format("{}.x", temporary);
    }

    template <char type>
    std::string BitfieldExtract(Operation operation) {
        const std::string temporary = AllocVectorTemporary();
        AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
        AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
        AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
        return fmt::format("{}.x", temporary);
    }

    template <char swizzle>
    std::string LocalInvocationId(Operation) {
        return fmt::format("invocation.localid.{}", swizzle);
    }

    template <char swizzle>
    std::string WorkGroupId(Operation) {
        return fmt::format("invocation.groupid.{}", swizzle);
    }

    template <char c1, char c2>
    std::string ThreadMask(Operation) {
        return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
    }

    template <typename... Args>
    void AddExpression(std::string_view text, Args&&... args) {
        shader_source += fmt::format(text, std::forward<Args>(args)...);
    }

    template <typename... Args>
    void AddLine(std::string_view text, Args&&... args) {
        AddExpression(text, std::forward<Args>(args)...);
        shader_source += '\n';
    }

    std::string AllocLongVectorTemporary() {
        max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
        return fmt::format("L{}", num_long_temporaries++);
    }

    std::string AllocLongTemporary() {
        return fmt::format("{}.x", AllocLongVectorTemporary());
    }

    std::string AllocVectorTemporary() {
        max_temporaries = std::max(max_temporaries, num_temporaries + 1);
        return fmt::format("T{}", num_temporaries++);
    }

    std::string AllocTemporary() {
        return fmt::format("{}.x", AllocVectorTemporary());
    }

    void ResetTemporaries() noexcept {
        num_temporaries = 0;
        num_long_temporaries = 0;
    }

    const Device& device;
    const ShaderIR& ir;
    const Registry& registry;
    const ShaderType stage;

    std::size_t num_temporaries = 0;
    std::size_t max_temporaries = 0;

    std::size_t num_long_temporaries = 0;
    std::size_t max_long_temporaries = 0;

    std::map<GlobalMemoryBase, u32> global_memory_names;

    std::string shader_source;

    static constexpr std::string_view ADD_F32 = "ADD.F32";
    static constexpr std::string_view ADD_S = "ADD.S";
    static constexpr std::string_view ADD_U = "ADD.U";
    static constexpr std::string_view MUL_F32 = "MUL.F32";
    static constexpr std::string_view MUL_S = "MUL.S";
    static constexpr std::string_view MUL_U = "MUL.U";
    static constexpr std::string_view DIV_F32 = "DIV.F32";
    static constexpr std::string_view DIV_S = "DIV.S";
    static constexpr std::string_view DIV_U = "DIV.U";
    static constexpr std::string_view MAD_F32 = "MAD.F32";
    static constexpr std::string_view RSQ_F32 = "RSQ.F32";
    static constexpr std::string_view COS_F32 = "COS.F32";
    static constexpr std::string_view SIN_F32 = "SIN.F32";
    static constexpr std::string_view EX2_F32 = "EX2.F32";
    static constexpr std::string_view LG2_F32 = "LG2.F32";
    static constexpr std::string_view SLT_F = "SLT.F32";
    static constexpr std::string_view SLT_S = "SLT.S";
    static constexpr std::string_view SLT_U = "SLT.U";
    static constexpr std::string_view SEQ_F = "SEQ.F32";
    static constexpr std::string_view SEQ_S = "SEQ.S";
    static constexpr std::string_view SEQ_U = "SEQ.U";
    static constexpr std::string_view SLE_F = "SLE.F32";
    static constexpr std::string_view SLE_S = "SLE.S";
    static constexpr std::string_view SLE_U = "SLE.U";
    static constexpr std::string_view SGT_F = "SGT.F32";
    static constexpr std::string_view SGT_S = "SGT.S";
    static constexpr std::string_view SGT_U = "SGT.U";
    static constexpr std::string_view SNE_F = "SNE.F32";
    static constexpr std::string_view SNE_S = "SNE.S";
    static constexpr std::string_view SNE_U = "SNE.U";
    static constexpr std::string_view SGE_F = "SGE.F32";
    static constexpr std::string_view SGE_S = "SGE.S";
    static constexpr std::string_view SGE_U = "SGE.U";
    static constexpr std::string_view AND_S = "AND.S";
    static constexpr std::string_view AND_U = "AND.U";
    static constexpr std::string_view TRUNC_F = "TRUNC.F";
    static constexpr std::string_view TRUNC_S = "TRUNC.S";
    static constexpr std::string_view TRUNC_U = "TRUNC.U";
    static constexpr std::string_view SHL_S = "SHL.S";
    static constexpr std::string_view SHL_U = "SHL.U";
    static constexpr std::string_view SHR_S = "SHR.S";
    static constexpr std::string_view SHR_U = "SHR.U";
    static constexpr std::string_view OR_S = "OR.S";
    static constexpr std::string_view OR_U = "OR.U";
    static constexpr std::string_view XOR_S = "XOR.S";
    static constexpr std::string_view XOR_U = "XOR.U";
    static constexpr std::string_view NOT_S = "NOT.S";
    static constexpr std::string_view NOT_U = "NOT.U";
    static constexpr std::string_view BTC_S = "BTC.S";
    static constexpr std::string_view BTC_U = "BTC.U";
    static constexpr std::string_view BTFM_S = "BTFM.S";
    static constexpr std::string_view BTFM_U = "BTFM.U";
    static constexpr std::string_view ROUND_F = "ROUND.F";
    static constexpr std::string_view CEIL_F = "CEIL.F";
    static constexpr std::string_view FLR_F = "FLR.F";
    static constexpr std::string_view I2F_S = "I2F.S";
    static constexpr std::string_view I2F_U = "I2F.U";
    static constexpr std::string_view MIN_F = "MIN.F";
    static constexpr std::string_view MIN_S = "MIN.S";
    static constexpr std::string_view MIN_U = "MIN.U";
    static constexpr std::string_view MAX_F = "MAX.F";
    static constexpr std::string_view MAX_S = "MAX.S";
    static constexpr std::string_view MAX_U = "MAX.U";
    static constexpr std::string_view MOV_U = "MOV.U";
    static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
    static constexpr std::string_view TGALL_U = "TGALL.U";
    static constexpr std::string_view TGANY_U = "TGANY.U";
    static constexpr std::string_view TGEQ_U = "TGEQ.U";
    static constexpr std::string_view EXCH = "EXCH";
    static constexpr std::string_view ADD = "ADD";
    static constexpr std::string_view MIN = "MIN";
    static constexpr std::string_view MAX = "MAX";
    static constexpr std::string_view AND = "AND";
    static constexpr std::string_view OR = "OR";
    static constexpr std::string_view XOR = "XOR";
    static constexpr std::string_view U32 = "U32";
    static constexpr std::string_view S32 = "S32";

    static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
    using DecompilerType = std::string (ARBDecompiler::*)(Operation);
    static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
        &ARBDecompiler::Assign,

        &ARBDecompiler::Select,

        &ARBDecompiler::Binary<ADD_F32>,
        &ARBDecompiler::Binary<MUL_F32>,
        &ARBDecompiler::Binary<DIV_F32>,
        &ARBDecompiler::Trinary<MAD_F32>,
        &ARBDecompiler::Negate<'F'>,
        &ARBDecompiler::Absolute<'F'>,
        &ARBDecompiler::FClamp,
        &ARBDecompiler::FCastHalf0,
        &ARBDecompiler::FCastHalf1,
        &ARBDecompiler::Binary<MIN_F>,
        &ARBDecompiler::Binary<MAX_F>,
        &ARBDecompiler::Unary<COS_F32>,
        &ARBDecompiler::Unary<SIN_F32>,
        &ARBDecompiler::Unary<EX2_F32>,
        &ARBDecompiler::Unary<LG2_F32>,
        &ARBDecompiler::Unary<RSQ_F32>,
        &ARBDecompiler::FSqrt,
        &ARBDecompiler::Unary<ROUND_F>,
        &ARBDecompiler::Unary<FLR_F>,
        &ARBDecompiler::Unary<CEIL_F>,
        &ARBDecompiler::Unary<TRUNC_F>,
        &ARBDecompiler::Unary<I2F_S>,
        &ARBDecompiler::Unary<I2F_U>,
        &ARBDecompiler::FSwizzleAdd,

        &ARBDecompiler::Binary<ADD_S>,
        &ARBDecompiler::Binary<MUL_S>,
        &ARBDecompiler::Binary<DIV_S>,
        &ARBDecompiler::Negate<'S'>,
        &ARBDecompiler::Absolute<'S'>,
        &ARBDecompiler::Binary<MIN_S>,
        &ARBDecompiler::Binary<MAX_S>,

        &ARBDecompiler::Unary<TRUNC_S>,
        &ARBDecompiler::Unary<MOV_U>,
        &ARBDecompiler::Binary<SHL_S>,
        &ARBDecompiler::Binary<SHR_U>,
        &ARBDecompiler::Binary<SHR_S>,
        &ARBDecompiler::Binary<AND_S>,
        &ARBDecompiler::Binary<OR_S>,
        &ARBDecompiler::Binary<XOR_S>,
        &ARBDecompiler::Unary<NOT_S>,
        &ARBDecompiler::BitfieldInsert<'S'>,
        &ARBDecompiler::BitfieldExtract<'S'>,
        &ARBDecompiler::Unary<BTC_S>,
        &ARBDecompiler::Unary<BTFM_S>,

        &ARBDecompiler::Binary<ADD_U>,
        &ARBDecompiler::Binary<MUL_U>,
        &ARBDecompiler::Binary<DIV_U>,
        &ARBDecompiler::Binary<MIN_U>,
        &ARBDecompiler::Binary<MAX_U>,
        &ARBDecompiler::Unary<TRUNC_U>,
        &ARBDecompiler::Unary<MOV_U>,
        &ARBDecompiler::Binary<SHL_U>,
        &ARBDecompiler::Binary<SHR_U>,
        &ARBDecompiler::Binary<SHR_U>,
        &ARBDecompiler::Binary<AND_U>,
        &ARBDecompiler::Binary<OR_U>,
        &ARBDecompiler::Binary<XOR_U>,
        &ARBDecompiler::Unary<NOT_U>,
        &ARBDecompiler::BitfieldInsert<'U'>,
        &ARBDecompiler::BitfieldExtract<'U'>,
        &ARBDecompiler::Unary<BTC_U>,
        &ARBDecompiler::Unary<BTFM_U>,

        &ARBDecompiler::HAdd2,
        &ARBDecompiler::HMul2,
        &ARBDecompiler::HFma2,
        &ARBDecompiler::HAbsolute,
        &ARBDecompiler::HNegate,
        &ARBDecompiler::HClamp,
        &ARBDecompiler::HCastFloat,
        &ARBDecompiler::HUnpack,
        &ARBDecompiler::HMergeF32,
        &ARBDecompiler::HMergeH0,
        &ARBDecompiler::HMergeH1,
        &ARBDecompiler::HPack2,

        &ARBDecompiler::LogicalAssign,
        &ARBDecompiler::Binary<AND_U>,
        &ARBDecompiler::Binary<OR_U>,
        &ARBDecompiler::Binary<XOR_U>,
        &ARBDecompiler::Unary<NOT_U>,
        &ARBDecompiler::LogicalPick2,
        &ARBDecompiler::LogicalAnd2,

        &ARBDecompiler::FloatComparison<SLT_F, false>,
        &ARBDecompiler::FloatComparison<SEQ_F, false>,
        &ARBDecompiler::FloatComparison<SLE_F, false>,
        &ARBDecompiler::FloatComparison<SGT_F, false>,
        &ARBDecompiler::FloatComparison<SNE_F, false>,
        &ARBDecompiler::FloatComparison<SGE_F, false>,
        &ARBDecompiler::FloatOrdered,
        &ARBDecompiler::FloatUnordered,
        &ARBDecompiler::FloatComparison<SLT_F, true>,
        &ARBDecompiler::FloatComparison<SEQ_F, true>,
        &ARBDecompiler::FloatComparison<SLE_F, true>,
        &ARBDecompiler::FloatComparison<SGT_F, true>,
        &ARBDecompiler::FloatComparison<SNE_F, true>,
        &ARBDecompiler::FloatComparison<SGE_F, true>,

        &ARBDecompiler::Binary<SLT_S>,
        &ARBDecompiler::Binary<SEQ_S>,
        &ARBDecompiler::Binary<SLE_S>,
        &ARBDecompiler::Binary<SGT_S>,
        &ARBDecompiler::Binary<SNE_S>,
        &ARBDecompiler::Binary<SGE_S>,

        &ARBDecompiler::Binary<SLT_U>,
        &ARBDecompiler::Binary<SEQ_U>,
        &ARBDecompiler::Binary<SLE_U>,
        &ARBDecompiler::Binary<SGT_U>,
        &ARBDecompiler::Binary<SNE_U>,
        &ARBDecompiler::Binary<SGE_U>,

        &ARBDecompiler::LogicalAddCarry,

        &ARBDecompiler::HalfComparison<SLT_F, false>,
        &ARBDecompiler::HalfComparison<SEQ_F, false>,
        &ARBDecompiler::HalfComparison<SLE_F, false>,
        &ARBDecompiler::HalfComparison<SGT_F, false>,
        &ARBDecompiler::HalfComparison<SNE_F, false>,
        &ARBDecompiler::HalfComparison<SGE_F, false>,
        &ARBDecompiler::HalfComparison<SLT_F, true>,
        &ARBDecompiler::HalfComparison<SEQ_F, true>,
        &ARBDecompiler::HalfComparison<SLE_F, true>,
        &ARBDecompiler::HalfComparison<SGT_F, true>,
        &ARBDecompiler::HalfComparison<SNE_F, true>,
        &ARBDecompiler::HalfComparison<SGE_F, true>,

        &ARBDecompiler::Texture,
        &ARBDecompiler::Texture,
        &ARBDecompiler::TextureGather,
        &ARBDecompiler::TextureQueryDimensions,
        &ARBDecompiler::TextureQueryLod,
        &ARBDecompiler::TexelFetch,
        &ARBDecompiler::TextureGradient,

        &ARBDecompiler::ImageLoad,
        &ARBDecompiler::ImageStore,

        &ARBDecompiler::AtomicImage<ADD, U32>,
        &ARBDecompiler::AtomicImage<AND, U32>,
        &ARBDecompiler::AtomicImage<OR, U32>,
        &ARBDecompiler::AtomicImage<XOR, U32>,
        &ARBDecompiler::AtomicImage<EXCH, U32>,

        &ARBDecompiler::Atomic<EXCH, U32>,
        &ARBDecompiler::Atomic<ADD, U32>,
        &ARBDecompiler::Atomic<MIN, U32>,
        &ARBDecompiler::Atomic<MAX, U32>,
        &ARBDecompiler::Atomic<AND, U32>,
        &ARBDecompiler::Atomic<OR, U32>,
        &ARBDecompiler::Atomic<XOR, U32>,

        &ARBDecompiler::Atomic<EXCH, S32>,
        &ARBDecompiler::Atomic<ADD, S32>,
        &ARBDecompiler::Atomic<MIN, S32>,
        &ARBDecompiler::Atomic<MAX, S32>,
        &ARBDecompiler::Atomic<AND, S32>,
        &ARBDecompiler::Atomic<OR, S32>,
        &ARBDecompiler::Atomic<XOR, S32>,

        &ARBDecompiler::Atomic<ADD, U32>,
        &ARBDecompiler::Atomic<MIN, U32>,
        &ARBDecompiler::Atomic<MAX, U32>,
        &ARBDecompiler::Atomic<AND, U32>,
        &ARBDecompiler::Atomic<OR, U32>,
        &ARBDecompiler::Atomic<XOR, U32>,

        &ARBDecompiler::Atomic<ADD, S32>,
        &ARBDecompiler::Atomic<MIN, S32>,
        &ARBDecompiler::Atomic<MAX, S32>,
        &ARBDecompiler::Atomic<AND, S32>,
        &ARBDecompiler::Atomic<OR, S32>,
        &ARBDecompiler::Atomic<XOR, S32>,

        &ARBDecompiler::Branch,
        &ARBDecompiler::BranchIndirect,
        &ARBDecompiler::PushFlowStack,
        &ARBDecompiler::PopFlowStack,
        &ARBDecompiler::Exit,
        &ARBDecompiler::Discard,

        &ARBDecompiler::EmitVertex,
        &ARBDecompiler::EndPrimitive,

        &ARBDecompiler::InvocationId,
        &ARBDecompiler::YNegate,
        &ARBDecompiler::LocalInvocationId<'x'>,
        &ARBDecompiler::LocalInvocationId<'y'>,
        &ARBDecompiler::LocalInvocationId<'z'>,
        &ARBDecompiler::WorkGroupId<'x'>,
        &ARBDecompiler::WorkGroupId<'y'>,
        &ARBDecompiler::WorkGroupId<'z'>,

        &ARBDecompiler::Unary<TGBALLOT_U>,
        &ARBDecompiler::Unary<TGALL_U>,
        &ARBDecompiler::Unary<TGANY_U>,
        &ARBDecompiler::Unary<TGEQ_U>,

        &ARBDecompiler::ThreadId,
        &ARBDecompiler::ThreadMask<'e', 'q'>,
        &ARBDecompiler::ThreadMask<'g', 'e'>,
        &ARBDecompiler::ThreadMask<'g', 't'>,
        &ARBDecompiler::ThreadMask<'l', 'e'>,
        &ARBDecompiler::ThreadMask<'l', 't'>,
        &ARBDecompiler::ShuffleIndexed,

        &ARBDecompiler::Barrier,
        &ARBDecompiler::MemoryBarrierGroup,
        &ARBDecompiler::MemoryBarrierGlobal,
    };
};

ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
                             ShaderType stage_, std::string_view identifier)
    : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
    DefineGlobalMemory();

    AddLine("TEMP RC;");
    AddLine("TEMP FSWZA[4];");
    AddLine("TEMP FSWZB[4];");
    if (ir.IsDecompiled()) {
        DecompileAST();
    } else {
        DecompileBranchMode();
    }
    AddLine("END");

    const std::string code = std::move(shader_source);
    DeclareHeader();
    DeclareVertex();
    DeclareGeometry();
    DeclareFragment();
    DeclareCompute();
    DeclareInputAttributes();
    DeclareOutputAttributes();
    DeclareLocalMemory();
    DeclareGlobalMemory();
    DeclareConstantBuffers();
    DeclareRegisters();
    DeclareTemporaries();
    DeclarePredicates();
    DeclareInternalFlags();

    shader_source += code;
}

std::string_view HeaderStageName(ShaderType stage) {
    switch (stage) {
    case ShaderType::Vertex:
        return "vp";
    case ShaderType::Geometry:
        return "gp";
    case ShaderType::Fragment:
        return "fp";
    case ShaderType::Compute:
        return "cp";
    default:
        UNREACHABLE();
        return "";
    }
}

void ARBDecompiler::DefineGlobalMemory() {
    u32 binding = 0;
    for (const auto& pair : ir.GetGlobalMemory()) {
        const GlobalMemoryBase base = pair.first;
        global_memory_names.emplace(base, binding);
        ++binding;
    }
}

void ARBDecompiler::DeclareHeader() {
    AddLine("!!NV{}5.0", HeaderStageName(stage));
    // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
    AddLine("OPTION NV_internal;");
    AddLine("OPTION NV_gpu_program_fp64;");
    AddLine("OPTION NV_shader_thread_group;");
    if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
        AddLine("OPTION NV_shader_thread_shuffle;");
    }
    if (stage == ShaderType::Vertex) {
        if (device.HasNvViewportArray2()) {
            AddLine("OPTION NV_viewport_array2;");
        }
    }
    if (stage == ShaderType::Fragment) {
        AddLine("OPTION ARB_draw_buffers;");
    }
    if (device.HasImageLoadFormatted()) {
        AddLine("OPTION EXT_shader_image_load_formatted;");
    }
}

void ARBDecompiler::DeclareVertex() {
    if (stage != ShaderType::Vertex) {
        return;
    }
    AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
}

void ARBDecompiler::DeclareGeometry() {
    if (stage != ShaderType::Geometry) {
        return;
    }
    const auto& info = registry.GetGraphicsInfo();
    const auto& header = ir.GetHeader();
    AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
    AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
    AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
    AddLine("ATTRIB vertex_position = vertex.position;");
}

void ARBDecompiler::DeclareFragment() {
    if (stage != ShaderType::Fragment) {
        return;
    }
    AddLine("OUTPUT result_color7 = result.color[7];");
    AddLine("OUTPUT result_color6 = result.color[6];");
    AddLine("OUTPUT result_color5 = result.color[5];");
    AddLine("OUTPUT result_color4 = result.color[4];");
    AddLine("OUTPUT result_color3 = result.color[3];");
    AddLine("OUTPUT result_color2 = result.color[2];");
    AddLine("OUTPUT result_color1 = result.color[1];");
    AddLine("OUTPUT result_color0 = result.color;");
}

void ARBDecompiler::DeclareCompute() {
    if (stage != ShaderType::Compute) {
        return;
    }
    const ComputeInfo& info = registry.GetComputeInfo();
    AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
            info.workgroup_size[2]);
    if (info.shared_memory_size_in_words == 0) {
        return;
    }
    const u32 limit = device.GetMaxComputeSharedMemorySize();
    u32 size_in_bytes = info.shared_memory_size_in_words * 4;
    if (size_in_bytes > limit) {
        LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
                  size_in_bytes, limit);
        size_in_bytes = limit;
    }

    AddLine("SHARED_MEMORY {};", size_in_bytes);
    AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
}

void ARBDecompiler::DeclareInputAttributes() {
    if (stage == ShaderType::Compute) {
        return;
    }
    const std::string_view stage_name = StageInputName(stage);
    for (const auto attribute : ir.GetInputAttributes()) {
        if (!IsGenericAttribute(attribute)) {
            continue;
        }
        const u32 index = GetGenericAttributeIndex(attribute);

        std::string_view suffix;
        if (stage == ShaderType::Fragment) {
            const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
            if (input_mode == PixelImap::Unused) {
                return;
            }
            suffix = GetInputFlags(input_mode);
        }
        AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
                index);
    }
}

void ARBDecompiler::DeclareOutputAttributes() {
    if (stage == ShaderType::Compute) {
        return;
    }
    for (const auto attribute : ir.GetOutputAttributes()) {
        if (!IsGenericAttribute(attribute)) {
            continue;
        }
        const u32 index = GetGenericAttributeIndex(attribute);
        AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
    }
}

void ARBDecompiler::DeclareLocalMemory() {
    u64 size = 0;
    if (stage == ShaderType::Compute) {
        size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
    } else {
        size = ir.GetHeader().GetLocalMemorySize();
    }
    if (size == 0) {
        return;
    }
    const u64 element_count = Common::AlignUp(size, 4) / 4;
    AddLine("TEMP lmem[{}];", element_count);
}

void ARBDecompiler::DeclareGlobalMemory() {
    const size_t num_entries = ir.GetGlobalMemory().size();
    if (num_entries > 0) {
        AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
    }
}

void ARBDecompiler::DeclareConstantBuffers() {
    u32 binding = 0;
    for (const auto& cbuf : ir.GetConstantBuffers()) {
        AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
        ++binding;
    }
}

void ARBDecompiler::DeclareRegisters() {
    for (const u32 gpr : ir.GetRegisters()) {
        AddLine("TEMP R{};", gpr);
    }
}

void ARBDecompiler::DeclareTemporaries() {
    for (std::size_t i = 0; i < max_temporaries; ++i) {
        AddLine("TEMP T{};", i);
    }
    for (std::size_t i = 0; i < max_long_temporaries; ++i) {
        AddLine("LONG TEMP L{};", i);
    }
}

void ARBDecompiler::DeclarePredicates() {
    for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
        AddLine("TEMP P{};", static_cast<u64>(pred));
    }
}

void ARBDecompiler::DeclareInternalFlags() {
    for (const char* name : INTERNAL_FLAG_NAMES) {
        AddLine("TEMP {};", name);
    }
}

void ARBDecompiler::InitializeVariables() {
    AddLine("MOV.F32 FSWZA[0], -1;");
    AddLine("MOV.F32 FSWZA[1], 1;");
    AddLine("MOV.F32 FSWZA[2], -1;");
    AddLine("MOV.F32 FSWZA[3], 0;");
    AddLine("MOV.F32 FSWZB[0], -1;");
    AddLine("MOV.F32 FSWZB[1], -1;");
    AddLine("MOV.F32 FSWZB[2], 1;");
    AddLine("MOV.F32 FSWZB[3], -1;");

    if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
        AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
    }
    for (const auto attribute : ir.GetOutputAttributes()) {
        if (!IsGenericAttribute(attribute)) {
            continue;
        }
        const u32 index = GetGenericAttributeIndex(attribute);
        AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
    }
    for (const u32 gpr : ir.GetRegisters()) {
        AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
    }
    for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
        AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
    }
}

void ARBDecompiler::DecompileAST() {
    const u32 num_flow_variables = ir.GetASTNumVariables();
    for (u32 i = 0; i < num_flow_variables; ++i) {
        AddLine("TEMP F{};", i);
    }
    for (u32 i = 0; i < num_flow_variables; ++i) {
        AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
    }

    InitializeVariables();

    VisitAST(ir.GetASTProgram());
}

void ARBDecompiler::DecompileBranchMode() {
    static constexpr u32 FLOW_STACK_SIZE = 20;
    if (!ir.IsFlowStackDisabled()) {
        AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
        AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
        AddLine("TEMP SSY_TOP;");
        AddLine("TEMP PBK_TOP;");
    }

    AddLine("TEMP PC;");

    if (!ir.IsFlowStackDisabled()) {
        AddLine("MOV.U SSY_TOP.x, 0;");
        AddLine("MOV.U PBK_TOP.x, 0;");
    }

    InitializeVariables();

    const auto basic_block_end = ir.GetBasicBlocks().end();
    auto basic_block_it = ir.GetBasicBlocks().begin();
    const u32 first_address = basic_block_it->first;
    AddLine("MOV.U PC.x, {};", first_address);

    AddLine("REP;");

    std::size_t num_blocks = 0;
    while (basic_block_it != basic_block_end) {
        const auto& [address, bb] = *basic_block_it;
        ++num_blocks;

        AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
        AddLine("IF NE.x;");

        VisitBlock(bb);

        ++basic_block_it;

        if (basic_block_it != basic_block_end) {
            const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
            if (!op || op->GetCode() != OperationCode::Branch) {
                const u32 next_address = basic_block_it->first;
                AddLine("MOV.U PC.x, {};", next_address);
                AddLine("CONT;");
            }
        }

        AddLine("ELSE;");
    }
    AddLine("RET;");
    while (num_blocks--) {
        AddLine("ENDIF;");
    }

    AddLine("ENDREP;");
}

void ARBDecompiler::VisitAST(const ASTNode& node) {
    if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
        for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
            VisitAST(current);
        }
    } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
        const std::string condition = VisitExpression(if_then->condition);
        ResetTemporaries();

        AddLine("MOVC.U RC.x, {};", condition);
        AddLine("IF NE.x;");
        for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
            VisitAST(current);
        }
        AddLine("ENDIF;");
    } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
        AddLine("ELSE;");
        for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
            VisitAST(current);
        }
    } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
        VisitBlock(decoded->nodes);
    } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
        AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
        ResetTemporaries();
    } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
        const std::string condition = VisitExpression(do_while->condition);
        ResetTemporaries();
        AddLine("REP;");
        for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
            VisitAST(current);
        }
        AddLine("MOVC.U RC.x, {};", condition);
        AddLine("BRK (NE.x);");
        AddLine("ENDREP;");
    } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
        const bool is_true = ExprIsTrue(ast_return->condition);
        if (!is_true) {
            AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
            AddLine("IF NE.x;");
            ResetTemporaries();
        }
        if (ast_return->kills) {
            AddLine("KIL TR;");
        } else {
            Exit();
        }
        if (!is_true) {
            AddLine("ENDIF;");
        }
    } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
        if (ExprIsTrue(ast_break->condition)) {
            AddLine("BRK;");
        } else {
            AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
            AddLine("BRK (NE.x);");
            ResetTemporaries();
        }
    } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
        // Nothing to do
    } else {
        UNREACHABLE();
    }
}

std::string ARBDecompiler::VisitExpression(const Expr& node) {
    if (const auto expr = std::get_if<ExprAnd>(&*node)) {
        std::string result = AllocTemporary();
        AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
                VisitExpression(expr->operand2));
        return result;
    }
    if (const auto expr = std::get_if<ExprOr>(&*node)) {
        std::string result = AllocTemporary();
        AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
                VisitExpression(expr->operand2));
        return result;
    }
    if (const auto expr = std::get_if<ExprNot>(&*node)) {
        std::string result = AllocTemporary();
        AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
        return result;
    }
    if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
        return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
    }
    if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
        return Visit(ir.GetConditionCode(expr->cc));
    }
    if (const auto expr = std::get_if<ExprVar>(&*node)) {
        return fmt::format("F{}.x", expr->var_index);
    }
    if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
        return expr->value ? "0xffffffff" : "0";
    }
    if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
        std::string result = AllocTemporary();
        AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
        return result;
    }
    UNREACHABLE();
    return "0";
}

void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
    for (const auto& node : bb) {
        Visit(node);
    }
}

std::string ARBDecompiler::Visit(const Node& node) {
    if (const auto operation = std::get_if<OperationNode>(&*node)) {
        if (const auto amend_index = operation->GetAmendIndex()) {
            Visit(ir.GetAmendNode(*amend_index));
        }
        const std::size_t index = static_cast<std::size_t>(operation->GetCode());
        if (index >= OPERATION_DECOMPILERS.size()) {
            UNREACHABLE_MSG("Out of bounds operation: {}", index);
            return {};
        }
        const auto decompiler = OPERATION_DECOMPILERS[index];
        if (decompiler == nullptr) {
            UNREACHABLE_MSG("Undefined operation: {}", index);
            return {};
        }
        return (this->*decompiler)(*operation);
    }

    if (const auto gpr = std::get_if<GprNode>(&*node)) {
        const u32 index = gpr->GetIndex();
        if (index == Register::ZeroIndex) {
            return "{0, 0, 0, 0}.x";
        }
        return fmt::format("R{}.x", index);
    }

    if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
        return fmt::format("CV{}.x", cv->GetIndex());
    }

    if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
        std::string temporary = AllocTemporary();
        AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
        return temporary;
    }

    if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
        std::string temporary = AllocTemporary();
        switch (const auto index = predicate->GetIndex(); index) {
        case Tegra::Shader::Pred::UnusedIndex:
            AddLine("MOV.S {}, -1;", temporary);
            break;
        case Tegra::Shader::Pred::NeverExecute:
            AddLine("MOV.S {}, 0;", temporary);
            break;
        default:
            AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
            break;
        }
        if (predicate->IsNegated()) {
            AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
        }
        return temporary;
    }

    if (const auto abuf = std::get_if<AbufNode>(&*node)) {
        if (abuf->IsPhysicalBuffer()) {
            UNIMPLEMENTED_MSG("Physical buffers are not implemented");
            return "{0, 0, 0, 0}.x";
        }

        const Attribute::Index index = abuf->GetIndex();
        const u32 element = abuf->GetElement();
        const char swizzle = Swizzle(element);
        switch (index) {
        case Attribute::Index::Position: {
            if (stage == ShaderType::Geometry) {
                return fmt::format("{}_position[{}].{}", StageInputName(stage),
                                   Visit(abuf->GetBuffer()), swizzle);
            } else {
                return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
            }
        }
        case Attribute::Index::TessCoordInstanceIDVertexID:
            ASSERT(stage == ShaderType::Vertex);
            switch (element) {
            case 2:
                return "vertex.instance";
            case 3:
                return "vertex.id";
            }
            UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
            break;
        case Attribute::Index::PointCoord:
            switch (element) {
            case 0:
                return "fragment.pointcoord.x";
            case 1:
                return "fragment.pointcoord.y";
            }
            UNIMPLEMENTED();
            break;
        case Attribute::Index::FrontFacing: {
            ASSERT(stage == ShaderType::Fragment);
            ASSERT(element == 3);
            const std::string temporary = AllocVectorTemporary();
            AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
            AddLine("MOV.U.CC RC.x, -RC;");
            AddLine("MOV.S {}.x, 0;", temporary);
            AddLine("MOV.S {}.x (NE.x), -1;", temporary);
            return fmt::format("{}.x", temporary);
        }
        default:
            if (IsGenericAttribute(index)) {
                if (stage == ShaderType::Geometry) {
                    return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
                                       Visit(abuf->GetBuffer()), swizzle);
                } else {
                    return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
                                       GetGenericAttributeIndex(index), swizzle);
                }
            }
            UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
            break;
        }
        return "{0, 0, 0, 0}.x";
    }

    if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
        std::string offset_string;
        const auto& offset = cbuf->GetOffset();
        if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
            offset_string = std::to_string(imm->GetValue());
        } else {
            offset_string = Visit(offset);
        }
        std::string temporary = AllocTemporary();
        AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
        return temporary;
    }

    if (const auto gmem = std::get_if<GmemNode>(&*node)) {
        std::string temporary = AllocTemporary();
        AddLine("MOV {}, 0;", temporary);
        AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
        return temporary;
    }

    if (const auto lmem = std::get_if<LmemNode>(&*node)) {
        std::string temporary = Visit(lmem->GetAddress());
        AddLine("SHR.U {}, {}, 2;", temporary, temporary);
        AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
        return temporary;
    }

    if (const auto smem = std::get_if<SmemNode>(&*node)) {
        std::string temporary = Visit(smem->GetAddress());
        AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
        return temporary;
    }

    if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
        const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
        return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
    }

    if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
        if (const auto amend_index = conditional->GetAmendIndex()) {
            Visit(ir.GetAmendNode(*amend_index));
        }
        AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
        AddLine("IF NE.x;");
        VisitBlock(conditional->GetCode());
        AddLine("ENDIF;");
        return {};
    }

    if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
        // Uncommenting this will generate invalid code. GLASM lacks comments.
        // AddLine("// {}", cmt->GetText());
        return {};
    }

    UNIMPLEMENTED();
    return {};
}

std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    UNIMPLEMENTED_IF(meta.sampler.is_indexed);

    const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
                             meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
    const std::size_t count = operation.GetOperandsCount();
    std::string temporary = AllocVectorTemporary();
    std::size_t i = 0;
    for (; i < count; ++i) {
        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
    }
    if (meta.sampler.is_array) {
        AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
        ++i;
    }
    if (meta.sampler.is_shadow) {
        std::string compare = Visit(meta.depth_compare);
        if (is_extended) {
            ASSERT(i == 4);
            std::string extra_coord = AllocVectorTemporary();
            AddLine("MOV.F {}.x, {};", extra_coord, compare);
            return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
        }
        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
        ++i;
    }
    return {temporary, temporary, i};
}

std::string ARBDecompiler::BuildAoffi(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    if (meta.aoffi.empty()) {
        return {};
    }
    const std::string temporary = AllocVectorTemporary();
    std::size_t i = 0;
    for (auto& node : meta.aoffi) {
        AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
    }
    return fmt::format(", offset({})", temporary);
}

std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
    // Read a bindless SSBO, return its address and set CC accordingly
    // address = c[binding].xy
    // length  = c[binding].z
    const u32 binding = global_memory_names.at(gmem.GetDescriptor());

    const std::string pointer = AllocLongVectorTemporary();
    std::string temporary = AllocTemporary();

    AddLine("PK64.U {}, c[{}];", pointer, binding);
    AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
            Visit(gmem.GetBaseAddress()));
    AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
    AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
    // Compare offset to length and set CC
    AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
    return fmt::format("{}.x", pointer);
}

void ARBDecompiler::Exit() {
    if (stage != ShaderType::Fragment) {
        AddLine("RET;");
        return;
    }

    const auto safe_get_register = [this](u32 reg) -> std::string {
        if (ir.GetRegisters().contains(reg)) {
            return fmt::format("R{}.x", reg);
        }
        return "{0, 0, 0, 0}.x";
    };

    const auto& header = ir.GetHeader();
    u32 current_reg = 0;
    for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
        for (u32 component = 0; component < 4; ++component) {
            if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
                continue;
            }
            AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
                    safe_get_register(current_reg));
            ++current_reg;
        }
    }
    if (header.ps.omap.depth) {
        AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
    }

    AddLine("RET;");
}

std::string ARBDecompiler::Assign(Operation operation) {
    const Node& dest = operation[0];
    const Node& src = operation[1];

    std::string dest_name;
    if (const auto gpr = std::get_if<GprNode>(&*dest)) {
        if (gpr->GetIndex() == Register::ZeroIndex) {
            // Writing to Register::ZeroIndex is a no op
            return {};
        }
        dest_name = fmt::format("R{}.x", gpr->GetIndex());
    } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
        const u32 element = abuf->GetElement();
        const char swizzle = Swizzle(element);
        switch (const Attribute::Index index = abuf->GetIndex()) {
        case Attribute::Index::Position:
            dest_name = fmt::format("result.position.{}", swizzle);
            break;
        case Attribute::Index::LayerViewportPointSize:
            switch (element) {
            case 0:
                UNIMPLEMENTED();
                return {};
            case 1:
            case 2:
                if (!device.HasNvViewportArray2()) {
                    LOG_ERROR(
                        Render_OpenGL,
                        "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
                    return {};
                }
                dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
                break;
            case 3:
                dest_name = "result.pointsize.x";
                break;
            }
            break;
        case Attribute::Index::ClipDistances0123:
            dest_name = fmt::format("result.clip[{}].x", element);
            break;
        case Attribute::Index::ClipDistances4567:
            dest_name = fmt::format("result.clip[{}].x", element + 4);
            break;
        default:
            if (!IsGenericAttribute(index)) {
                UNREACHABLE();
                return {};
            }
            dest_name =
                fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
            break;
        }
    } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
        const std::string address = Visit(lmem->GetAddress());
        AddLine("SHR.U {}, {}, 2;", address, address);
        dest_name = fmt::format("lmem[{}].x", address);
    } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
        AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
        ResetTemporaries();
        return {};
    } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
        AddLine("IF NE.x;");
        AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
        AddLine("ENDIF;");
        ResetTemporaries();
        return {};
    } else {
        UNREACHABLE();
        ResetTemporaries();
        return {};
    }

    AddLine("MOV.U {}, {};", dest_name, Visit(src));
    ResetTemporaries();
    return {};
}

std::string ARBDecompiler::Select(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
            Visit(operation[2]));
    return temporary;
}

std::string ARBDecompiler::FClamp(Operation operation) {
    // 1.0f in hex, replace with std::bit_cast on C++20
    static constexpr u32 POSITIVE_ONE = 0x3f800000;

    std::string temporary = AllocTemporary();
    const Node& value = operation[0];
    const Node& low = operation[1];
    const Node& high = operation[2];
    const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
    const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
    if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
        AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
    } else {
        AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
        AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
    }
    return temporary;
}

std::string ARBDecompiler::FCastHalf0(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::FCastHalf1(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
    AddLine("MOV {}.x, {}.y;", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::FSqrt(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
    AddLine("RCP.F32 {}, {};", temporary, temporary);
    return temporary;
}

std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    if (!device.HasWarpIntrinsics()) {
        LOG_ERROR(Render_OpenGL,
                  "NV_shader_thread_shuffle is missing. Kepler or better is required.");
        AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
        return fmt::format("{}.x", temporary);
    }

    AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
    AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
    AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
    AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
    AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
    AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
    AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HAdd2(Operation operation) {
    const std::string tmp1 = AllocVectorTemporary();
    const std::string tmp2 = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
    AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
    AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
    return fmt::format("{}.x", tmp1);
}

std::string ARBDecompiler::HMul2(Operation operation) {
    const std::string tmp1 = AllocVectorTemporary();
    const std::string tmp2 = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
    AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
    AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
    return fmt::format("{}.x", tmp1);
}

std::string ARBDecompiler::HFma2(Operation operation) {
    const std::string tmp1 = AllocVectorTemporary();
    const std::string tmp2 = AllocVectorTemporary();
    const std::string tmp3 = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
    AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
    AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
    AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
    return fmt::format("{}.x", tmp1);
}

std::string ARBDecompiler::HAbsolute(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HNegate(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
    AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
    AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
    AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HClamp(Operation operation) {
    const std::string tmp1 = AllocVectorTemporary();
    const std::string tmp2 = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
    AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
    AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
    AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
    AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
    AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
    AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
    AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
    return fmt::format("{}.x", tmp1);
}

std::string ARBDecompiler::HCastFloat(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
    AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HUnpack(Operation operation) {
    std::string operand = Visit(operation[0]);
    switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
    case Tegra::Shader::HalfType::H0_H1:
        return operand;
    case Tegra::Shader::HalfType::F32: {
        const std::string temporary = AllocVectorTemporary();
        AddLine("MOV.U {}.x, {};", temporary, operand);
        AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
        AddLine("PK2H.F {}.x, {};", temporary, temporary);
        return fmt::format("{}.x", temporary);
    }
    case Tegra::Shader::HalfType::H0_H0: {
        const std::string temporary = AllocVectorTemporary();
        AddLine("UP2H.F {}.xy, {};", temporary, operand);
        AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
        AddLine("PK2H.F {}.x, {};", temporary, temporary);
        return fmt::format("{}.x", temporary);
    }
    case Tegra::Shader::HalfType::H1_H1: {
        const std::string temporary = AllocVectorTemporary();
        AddLine("UP2H.F {}.xy, {};", temporary, operand);
        AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
        AddLine("PK2H.F {}.x, {};", temporary, temporary);
        return fmt::format("{}.x", temporary);
    }
    }
    UNREACHABLE();
    return "{0, 0, 0, 0}.x";
}

std::string ARBDecompiler::HMergeF32(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HMergeH0(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
    AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HMergeH1(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
    AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
    AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::HPack2(Operation operation) {
    const std::string temporary = AllocVectorTemporary();
    AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
    AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
    AddLine("PK2H.F {}.x, {};", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::LogicalAssign(Operation operation) {
    const Node& dest = operation[0];
    const Node& src = operation[1];

    std::string target;

    if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
        ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");

        const Tegra::Shader::Pred index = pred->GetIndex();
        switch (index) {
        case Tegra::Shader::Pred::NeverExecute:
        case Tegra::Shader::Pred::UnusedIndex:
            // Writing to these predicates is a no-op
            return {};
        }
        target = fmt::format("P{}.x", static_cast<u64>(index));
    } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
        const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
        target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
    } else {
        UNREACHABLE();
        ResetTemporaries();
        return {};
    }

    AddLine("MOV.U {}, {};", target, Visit(src));
    ResetTemporaries();
    return {};
}

std::string ARBDecompiler::LogicalPick2(Operation operation) {
    std::string temporary = AllocTemporary();
    const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
    AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
    return temporary;
}

std::string ARBDecompiler::LogicalAnd2(Operation operation) {
    std::string temporary = AllocTemporary();
    const std::string op = Visit(operation[0]);
    AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
    return temporary;
}

std::string ARBDecompiler::FloatOrdered(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
    AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
    AddLine("MOV.S {}, -1;", temporary);
    AddLine("MOV.S {} (NAN.x), 0;", temporary);
    AddLine("MOV.S {} (NAN.y), 0;", temporary);
    return temporary;
}

std::string ARBDecompiler::FloatUnordered(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
    AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
    AddLine("MOV.S {}, 0;", temporary);
    AddLine("MOV.S {} (NAN.x), -1;", temporary);
    AddLine("MOV.S {} (NAN.y), -1;", temporary);
    return temporary;
}

std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
    std::string temporary = AllocTemporary();
    AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
    AddLine("MOV.S {}, 0;", temporary);
    AddLine("IF CF.x;");
    AddLine("MOV.S {}, -1;", temporary);
    AddLine("ENDIF;");
    return temporary;
}

std::string ARBDecompiler::Texture(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
    const auto [coords, temporary, swizzle] = BuildCoords(operation);

    std::string_view opcode = "TEX";
    std::string extra;
    if (meta.bias) {
        ASSERT(!meta.lod);
        opcode = "TXB";

        if (swizzle < 4) {
            AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
        } else {
            const std::string bias = AllocTemporary();
            AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
            extra = fmt::format(" {},", bias);
        }
    }
    if (meta.lod) {
        ASSERT(!meta.bias);
        opcode = "TXL";

        if (swizzle < 4) {
            AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
        } else {
            const std::string lod = AllocTemporary();
            AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
            extra = fmt::format(" {},", lod);
        }
    }

    AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
            TextureType(meta), BuildAoffi(operation));
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TextureGather(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
    const auto [coords, temporary, swizzle] = BuildCoords(operation);

    std::string comp;
    if (!meta.sampler.is_shadow) {
        const auto& immediate = std::get<ImmediateNode>(*meta.component);
        comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
    }

    AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
            TextureType(meta), BuildAoffi(operation));
    AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const std::string temporary = AllocVectorTemporary();
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;

    ASSERT(!meta.sampler.is_array);

    const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
    AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TextureQueryLod(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const std::string temporary = AllocVectorTemporary();
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;

    ASSERT(!meta.sampler.is_array);

    const std::size_t count = operation.GetOperandsCount();
    for (std::size_t i = 0; i < count; ++i) {
        AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
    }
    AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
    AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
    AddLine("TRUNC.S {}, {};", temporary, temporary);
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TexelFetch(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
    const auto [coords, temporary, swizzle] = BuildCoords(operation);

    if (!meta.sampler.is_buffer) {
        ASSERT(swizzle < 4);
        AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
    }
    AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
            BuildAoffi(operation));
    AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::TextureGradient(Operation operation) {
    const auto& meta = std::get<MetaTexture>(operation.GetMeta());
    const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
    const std::string ddx = AllocVectorTemporary();
    const std::string ddy = AllocVectorTemporary();
    const std::string coord = std::get<1>(BuildCoords(operation));

    const std::size_t num_components = meta.derivates.size() / 2;
    for (std::size_t index = 0; index < num_components; ++index) {
        const char swizzle = Swizzle(index);
        AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
        AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
    }

    const std::string_view result = coord;
    AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
            TextureType(meta), BuildAoffi(operation));
    AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
    return fmt::format("{}.x", result);
}

std::string ARBDecompiler::ImageLoad(Operation operation) {
    const auto& meta = std::get<MetaImage>(operation.GetMeta());
    const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
    const std::size_t count = operation.GetOperandsCount();
    const std::string_view type = ImageType(meta.image.type);

    const std::string temporary = AllocVectorTemporary();
    for (std::size_t i = 0; i < count; ++i) {
        AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
    }
    AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
    AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::ImageStore(Operation operation) {
    const auto& meta = std::get<MetaImage>(operation.GetMeta());
    const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
    const std::size_t num_coords = operation.GetOperandsCount();
    const std::size_t num_values = meta.values.size();
    const std::string_view type = ImageType(meta.image.type);

    const std::string coord = AllocVectorTemporary();
    const std::string value = AllocVectorTemporary();
    for (std::size_t i = 0; i < num_coords; ++i) {
        AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
    }
    for (std::size_t i = 0; i < num_values; ++i) {
        AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
    }
    AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
    return {};
}

std::string ARBDecompiler::Branch(Operation operation) {
    const auto target = std::get<ImmediateNode>(*operation[0]);
    AddLine("MOV.U PC.x, {};", target.GetValue());
    AddLine("CONT;");
    return {};
}

std::string ARBDecompiler::BranchIndirect(Operation operation) {
    AddLine("MOV.U PC.x, {};", Visit(operation[0]));
    AddLine("CONT;");
    return {};
}

std::string ARBDecompiler::PushFlowStack(Operation operation) {
    const auto stack = std::get<MetaStackClass>(operation.GetMeta());
    const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
    const std::string_view stack_name = StackName(stack);
    AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
    AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
    return {};
}

std::string ARBDecompiler::PopFlowStack(Operation operation) {
    const auto stack = std::get<MetaStackClass>(operation.GetMeta());
    const std::string_view stack_name = StackName(stack);
    AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
    AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
    AddLine("CONT;");
    return {};
}

std::string ARBDecompiler::Exit(Operation) {
    Exit();
    return {};
}

std::string ARBDecompiler::Discard(Operation) {
    AddLine("KIL TR;");
    return {};
}

std::string ARBDecompiler::EmitVertex(Operation) {
    AddLine("EMIT;");
    return {};
}

std::string ARBDecompiler::EndPrimitive(Operation) {
    AddLine("ENDPRIM;");
    return {};
}

std::string ARBDecompiler::InvocationId(Operation) {
    return "primitive.invocation";
}

std::string ARBDecompiler::YNegate(Operation) {
    LOG_WARNING(Render_OpenGL, "(STUBBED)");
    std::string temporary = AllocTemporary();
    AddLine("MOV.F {}, 1;", temporary);
    return temporary;
}

std::string ARBDecompiler::ThreadId(Operation) {
    return fmt::format("{}.threadid", StageInputName(stage));
}

std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
    if (!device.HasWarpIntrinsics()) {
        LOG_ERROR(Render_OpenGL,
                  "NV_shader_thread_shuffle is missing. Kepler or better is required.");
        return Visit(operation[0]);
    }
    const std::string temporary = AllocVectorTemporary();
    AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
            Visit(operation[1]));
    AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
    return fmt::format("{}.x", temporary);
}

std::string ARBDecompiler::Barrier(Operation) {
    AddLine("BAR;");
    return {};
}

std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
    AddLine("MEMBAR.CTA;");
    return {};
}

std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
    AddLine("MEMBAR;");
    return {};
}

} // Anonymous namespace

std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
                                    const VideoCommon::Shader::Registry& registry,
                                    Tegra::Engines::ShaderType stage, std::string_view identifier) {
    return ARBDecompiler(device, ir, registry, stage, identifier).Code();
}

} // namespace OpenGL