summaryrefslogblamecommitdiffstats
path: root/src/shader_recompiler/backend/glsl/emit_context.cpp
blob: 50a7a7447ab51d79f1aadc2ee1dc4d06f941d296 (plain) (tree)
1
2
3
4
5
6
7
8
9
10






                                                        
                                      

                                 
           
                              


                            
                                 


                                     




                                                        
                       
                                      
                                


                                                              
 
                                                   

                         







                                    




                                             






                                 



                                                    




                  















                                                               


                                                                    
                   



                                   





                                   

                                



                                     




                                                                































                                                                  























                                                                          


























                                                                  
 

                                                               

               
                                                   
                                     

                                        
                                        

                                             
                                                                                                  
                                       

                                          
                   
                                                                         

                                              
 
                        
 


                                                                                           
                            



                            
                          

                                    

                                                                              
              
                                 

                                                                                              

                                                                          

                         
                          
                                                                            

                                                                                                 

                         
                          

                        
                          
                                                                                            

                                                                                   
              
     
                                     


                                                                         
                                                                                     







                                                                                

                                                                                                    




                                                                            
                                                                                         

                                                                          
                                                  
                                                                                                
                                                            

         

                                    
                          
                            
                            

 
                                                 
                             
                                                            
                                                                         


                                                                    


                                                                  








                                                                                                 
                         





                                                                           
     

                                                                                                 

                                                                   


                                                                      
     
                                                                                          
                                   
                                                                             
     

 
                                                             



                                                               

                                                                                   

                                                                                               


     
                                                            


                                                   
                
                                                               


                                                                                                  
                                              
                            


     




































                                                                                                    

 
                                           

                                                                             
                                                                   
                                                                                            
     
                                                                   
                                                     
                                                                     
     
                                   
                                                                  
                                            

                                     
                                                                     
                                                              

                                     
                                                                     
                                                                   

                                     
                                                                     
                                                                   
     
                                     
                                                                        
                                                                

                                     
                                                                        
                                                                     

                                     
                                                                        
                                                                     
     
                                   
                                                                                                

                                   
                                                                                                
     
                                  


                                                
 
                                                        
                                                                                                    














                                                                                                    
 




                                                                                          
 
                                                                          
                                                                    
       





                                                                                 




                                                                      











                                                                                               







                                                                                                  

 


                                                                        
                                                        
                                                        
                                                          
                                                                    

                                                                                     
         



                                                          
                                                 







                                                                                                    

                                     


                                                     

                                                                            
                                                            





                                                                                           



                                                              
                                                                       


                                                                      

                                                                                           




                                       
                                    
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/backend/glsl/emit_context.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/profile.h"

namespace Shader::Backend::GLSL {
namespace {
u32 CbufIndex(size_t offset) {
    return (offset / 4) % 4;
}

char CbufSwizzle(size_t offset) {
    return "xyzw"[CbufIndex(offset)];
}

std::string_view InterpDecorator(Interpolation interp) {
    switch (interp) {
    case Interpolation::Smooth:
        return "";
    case Interpolation::Flat:
        return "flat ";
    case Interpolation::NoPerspective:
        return "noperspective ";
    }
    throw InvalidArgument("Invalid interpolation {}", interp);
}

std::string_view InputArrayDecorator(Stage stage) {
    switch (stage) {
    case Stage::Geometry:
    case Stage::TessellationControl:
    case Stage::TessellationEval:
        return "[]";
    default:
        return "";
    }
}

bool StoresPerVertexAttributes(Stage stage) {
    switch (stage) {
    case Stage::VertexA:
    case Stage::VertexB:
    case Stage::Geometry:
    case Stage::TessellationEval:
        return true;
    default:
        return false;
    }
}

std::string OutputDecorator(Stage stage, u32 size) {
    switch (stage) {
    case Stage::TessellationControl:
        return fmt::format("[{}]", size);
    default:
        return "";
    }
}

std::string_view SamplerType(TextureType type, bool is_depth) {
    if (is_depth) {
        switch (type) {
        case TextureType::Color1D:
            return "sampler1DShadow";
        case TextureType::ColorArray1D:
            return "sampler1DArrayShadow";
        case TextureType::Color2D:
            return "sampler2DShadow";
        case TextureType::ColorArray2D:
            return "sampler2DArrayShadow";
        case TextureType::ColorCube:
            return "samplerCubeShadow";
        case TextureType::ColorArrayCube:
            return "samplerCubeArrayShadow";
        default:
            throw NotImplementedException("Texture type: {}", type);
        }
    }
    switch (type) {
    case TextureType::Color1D:
        return "sampler1D";
    case TextureType::ColorArray1D:
        return "sampler1DArray";
    case TextureType::Color2D:
        return "sampler2D";
    case TextureType::ColorArray2D:
        return "sampler2DArray";
    case TextureType::Color3D:
        return "sampler3D";
    case TextureType::ColorCube:
        return "samplerCube";
    case TextureType::ColorArrayCube:
        return "samplerCubeArray";
    case TextureType::Buffer:
        return "samplerBuffer";
    default:
        throw NotImplementedException("Texture type: {}", type);
    }
}

std::string_view ImageType(TextureType type) {
    switch (type) {
    case TextureType::Color2D:
        return "uimage2D";
    default:
        throw NotImplementedException("Image type: {}", type);
    }
}

std::string_view ImageFormatString(ImageFormat format) {
    switch (format) {
    case ImageFormat::Typeless:
        return "";
    case ImageFormat::R8_UINT:
        return ",r8ui";
    case ImageFormat::R8_SINT:
        return ",r8i";
    case ImageFormat::R16_UINT:
        return ",r16ui";
    case ImageFormat::R16_SINT:
        return ",r16i";
    case ImageFormat::R32_UINT:
        return ",r32ui";
    case ImageFormat::R32G32_UINT:
        return ",rg32ui";
    case ImageFormat::R32G32B32A32_UINT:
        return ",rgba32ui";
    default:
        throw NotImplementedException("Image format: {}", format);
    }
}

std::string_view GetTessMode(TessPrimitive primitive) {
    switch (primitive) {
    case TessPrimitive::Triangles:
        return "triangles";
    case TessPrimitive::Quads:
        return "quads";
    case TessPrimitive::Isolines:
        return "isolines";
    }
    throw InvalidArgument("Invalid tessellation primitive {}", primitive);
}

std::string_view GetTessSpacing(TessSpacing spacing) {
    switch (spacing) {
    case TessSpacing::Equal:
        return "equal_spacing";
    case TessSpacing::FractionalOdd:
        return "fractional_odd_spacing";
    case TessSpacing::FractionalEven:
        return "fractional_even_spacing";
    }
    throw InvalidArgument("Invalid tessellation spacing {}", spacing);
}

std::string_view InputPrimitive(InputTopology topology) {
    switch (topology) {
    case InputTopology::Points:
        return "points";
    case InputTopology::Lines:
        return "lines";
    case InputTopology::LinesAdjacency:
        return "lines_adjacency";
    case InputTopology::Triangles:
        return "triangles";
    case InputTopology::TrianglesAdjacency:
        return "triangles_adjacency";
    }
    throw InvalidArgument("Invalid input topology {}", topology);
}

std::string_view OutputPrimitive(OutputTopology topology) {
    switch (topology) {
    case OutputTopology::PointList:
        return "points";
    case OutputTopology::LineStrip:
        return "line_strip";
    case OutputTopology::TriangleStrip:
        return "triangle_strip";
    }
    throw InvalidArgument("Invalid output topology {}", topology);
}

void SetupOutPerVertex(EmitContext& ctx, std::string& header) {
    if (!StoresPerVertexAttributes(ctx.stage)) {
        return;
    }
    header += "out gl_PerVertex{vec4 gl_Position;";
    if (ctx.info.stores_point_size) {
        header += "float gl_PointSize;";
    }
    if (ctx.info.stores_clip_distance) {
        header += "float gl_ClipDistance[];";
    }
    if (ctx.info.stores_viewport_index && ctx.profile.support_viewport_index_layer_non_geometry &&
        ctx.stage != Stage::Geometry) {
        header += "int gl_ViewportIndex;";
    }
    header += "};";
    if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) {
        header += "out int gl_ViewportIndex;";
    }
}
} // Anonymous namespace

EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
                         const RuntimeInfo& runtime_info_)
    : info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
    SetupExtensions(header);
    stage = program.stage;
    switch (program.stage) {
    case Stage::VertexA:
    case Stage::VertexB:
        stage_name = "vs";
        break;
    case Stage::TessellationControl:
        stage_name = "tcs";
        header += fmt::format("layout(vertices={})out;", program.invocations);
        break;
    case Stage::TessellationEval:
        stage_name = "tes";
        header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive),
                              GetTessSpacing(runtime_info.tess_spacing),
                              runtime_info.tess_clockwise ? "cw" : "ccw");
        break;
    case Stage::Geometry:
        stage_name = "gs";
        header += fmt::format("layout({})in;layout({},max_vertices={})out;",
                              InputPrimitive(runtime_info.input_topology),
                              OutputPrimitive(program.output_topology), program.output_vertices);
        break;
    case Stage::Fragment:
        stage_name = "fs";
        break;
    case Stage::Compute:
        stage_name = "cs";
        header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;",
                              program.workgroup_size[0], program.workgroup_size[1],
                              program.workgroup_size[2]);
        break;
    }
    SetupOutPerVertex(*this, header);
    for (size_t index = 0; index < info.input_generics.size(); ++index) {
        const auto& generic{info.input_generics[index]};
        if (generic.used) {
            header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index,
                                  InterpDecorator(generic.interpolation), index,
                                  InputArrayDecorator(stage));
        }
    }
    for (size_t index = 0; index < info.uses_patches.size(); ++index) {
        if (!info.uses_patches[index]) {
            continue;
        }
        const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"};
        header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index);
    }
    for (size_t index = 0; index < info.stores_frag_color.size(); ++index) {
        if (!info.stores_frag_color[index]) {
            continue;
        }
        header += fmt::format("layout(location={})out vec4 frag_color{};", index, index);
    }
    for (size_t index = 0; index < info.stores_generics.size(); ++index) {
        // TODO: Properly resolve attribute issues
        if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) {
            DefineGenericOutput(index, program.invocations);
        }
    }
    DefineConstantBuffers(bindings);
    DefineStorageBuffers(bindings);
    SetupImages(bindings);
    SetupTextures(bindings);
    DefineHelperFunctions();
}

void EmitContext::SetupExtensions(std::string&) {
    // TODO: track this usage
    header += "#extension GL_ARB_sparse_texture2 : enable\n"
              "#extension GL_EXT_shader_image_load_formatted : enable\n";
    if (profile.support_gl_texture_shadow_lod) {
        header += "#extension GL_EXT_texture_shadow_lod : enable\n";
    }
    if (info.uses_int64) {
        header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
    }
    if (info.uses_int64_bit_atomics) {
        header += "#extension GL_NV_shader_atomic_int64 : enable\n";
    }
    if (info.uses_atomic_f32_add) {
        header += "#extension GL_NV_shader_atomic_float : enable\n";
    }
    if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
        header += "#extension NV_shader_atomic_fp16_vector : enable\n";
    }
    if (info.uses_fp16) {
        if (profile.support_gl_nv_gpu_shader_5) {
            header += "#extension GL_NV_gpu_shader5 : enable\n";
        }
        if (profile.support_gl_amd_gpu_shader_half_float) {
            header += "#extension GL_AMD_gpu_shader_half_float : enable\n";
        }
    }
    if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
        info.uses_subgroup_shuffles || info.uses_fswzadd) {
        header += "#extension GL_ARB_shader_ballot : enable\n"
                  "#extension GL_ARB_shader_group_vote : enable\n";
        if (!info.uses_int64) {
            header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
        }
    }
    if (info.stores_viewport_index && profile.support_viewport_index_layer_non_geometry &&
        stage != Stage::Geometry) {
        header += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
    }
}

void EmitContext::DefineConstantBuffers(Bindings& bindings) {
    if (info.constant_buffer_descriptors.empty()) {
        return;
    }
    for (const auto& desc : info.constant_buffer_descriptors) {
        header += fmt::format(
            "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
            bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
        bindings.uniform_buffer += desc.count;
    }
}

void EmitContext::DefineStorageBuffers(Bindings& bindings) {
    if (info.storage_buffers_descriptors.empty()) {
        return;
    }
    u32 index{};
    for (const auto& desc : info.storage_buffers_descriptors) {
        header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};",
                              bindings.storage_buffer, stage_name, bindings.storage_buffer,
                              stage_name, index);
        bindings.storage_buffer += desc.count;
        index += desc.count;
    }
}

void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
    static constexpr std::string_view swizzle{"xyzw"};
    const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
    u32 element{0};
    while (element < 4) {
        std::string definition{fmt::format("layout(location={}", index)};
        const u32 remainder{4 - element};
        const TransformFeedbackVarying* xfb_varying{};
        if (!runtime_info.xfb_varyings.empty()) {
            xfb_varying = &runtime_info.xfb_varyings[base_index + element];
            xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
        }
        const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
        if (element > 0) {
            definition += fmt::format(",component={}", element);
        }
        if (xfb_varying) {
            definition +=
                fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer,
                            xfb_varying->stride, xfb_varying->offset);
        }
        std::string name{fmt::format("out_attr{}", index)};
        if (num_components < 4 || element > 0) {
            name += fmt::format("_{}", swizzle.substr(element, num_components));
        }
        const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)};
        definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations));
        header += definition;

        const GenericElementInfo element_info{
            .name = name,
            .first_element = element,
            .num_components = num_components,
        };
        std::fill_n(output_generics[index].begin() + element, num_components, element_info);
        element += num_components;
    }
}

void EmitContext::DefineHelperFunctions() {
    header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
              "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
    if (info.uses_global_increment || info.uses_shared_increment) {
        header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}";
    }
    if (info.uses_global_decrement || info.uses_shared_decrement) {
        header += "uint CasDecrement(uint op_a,uint "
                  "op_b){return op_a==0||op_a>op_b?op_b:(op_a-1u);}";
    }
    if (info.uses_atomic_f32_add) {
        header += "uint CasFloatAdd(uint op_a,float op_b){return "
                  "ftou(utof(op_a)+op_b);}";
    }
    if (info.uses_atomic_f32x2_add) {
        header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return "
                  "packHalf2x16(unpackHalf2x16(op_a)+op_b);}";
    }
    if (info.uses_atomic_f32x2_min) {
        header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return "
                  "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}";
    }
    if (info.uses_atomic_f32x2_max) {
        header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return "
                  "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}";
    }
    if (info.uses_atomic_f16x2_add) {
        header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return "
                  "packFloat2x16(unpackFloat2x16(op_a)+op_b);}";
    }
    if (info.uses_atomic_f16x2_min) {
        header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return "
                  "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}";
    }
    if (info.uses_atomic_f16x2_max) {
        header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return "
                  "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}";
    }
    if (info.uses_atomic_s32_min) {
        header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}";
    }
    if (info.uses_atomic_s32_max) {
        header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
    }
    if (info.uses_global_memory) {
        header += DefineGlobalMemoryFunctions();
    }
}

std::string EmitContext::DefineGlobalMemoryFunctions() {
    const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) {
        const auto& ssbo{info.storage_buffers_descriptors[index]};
        const u32 size_cbuf_offset{ssbo.cbuf_offset + 8};
        const auto ssbo_addr{fmt::format("ssbo_addr{}", index)};
        const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)};
        std::array<std::string, 2> addr_xy;
        std::array<std::string, 2> size_xy;
        for (size_t i = 0; i < addr_xy.size(); ++i) {
            const auto addr_loc{ssbo.cbuf_offset + 4 * i};
            const auto size_loc{size_cbuf_offset + 4 * i};
            addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, CbufSwizzle(addr_loc));
            size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, CbufSwizzle(size_loc));
        }
        const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])};
        const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)};
        func += addr_statment;

        const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])};
        const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)};
        const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)};
        const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)};
        func += comparison;

        const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)};
        func += fmt::format(return_statement, ssbo_name, ssbo_addr);
    }};
    std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"};
    std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"};
    std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"};
    std::string load_func{"uint LoadGlobal32(uint64_t addr){"};
    std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"};
    std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"};
    const size_t num_buffers{info.storage_buffers_descriptors.size()};
    for (size_t index = 0; index < num_buffers; ++index) {
        if (!info.nvn_buffer_used[index]) {
            continue;
        }
        define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}");
        define_body(write_func_64, index,
                    "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}");
        define_body(write_func_128, index,
                    "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint("
                    "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}");
        define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}");
        define_body(load_func_64, index,
                    "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}");
        define_body(load_func_128, index,
                    "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
                    "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
    }
    write_func += "}";
    write_func_64 += "}";
    write_func_128 += "}";
    load_func += "return 0u;}";
    load_func_64 += "return uvec2(0);}";
    load_func_128 += "return uvec4(0);}";
    return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128;
}

void EmitContext::SetupImages(Bindings& bindings) {
    image_buffer_bindings.reserve(info.image_buffer_descriptors.size());
    for (const auto& desc : info.image_buffer_descriptors) {
        image_buffer_bindings.push_back(bindings.image);
        const auto indices{bindings.image + desc.count};
        const auto format{ImageFormatString(desc.format)};
        for (u32 index = bindings.image; index < indices; ++index) {
            header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{};",
                                  bindings.image, format, index);
        }
        bindings.image += desc.count;
    }
    image_bindings.reserve(info.image_descriptors.size());
    for (const auto& desc : info.image_descriptors) {
        image_bindings.push_back(bindings.image);
        const auto format{ImageFormatString(desc.format)};
        const auto image_type{ImageType(desc.type)};
        const auto qualifier{desc.is_written ? "" : "readonly "};
        const auto indices{bindings.image + desc.count};
        for (u32 index = bindings.image; index < indices; ++index) {
            header += fmt::format("layout(binding={}{})uniform {}{} img{};", bindings.image, format,
                                  qualifier, image_type, index);
        }
        bindings.image += desc.count;
    }
}

void EmitContext::SetupTextures(Bindings& bindings) {
    texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size());
    for (const auto& desc : info.texture_buffer_descriptors) {
        texture_buffer_bindings.push_back(bindings.texture);
        const auto sampler_type{SamplerType(TextureType::Buffer, false)};
        const auto indices{bindings.texture + desc.count};
        for (u32 index = bindings.texture; index < indices; ++index) {
            header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture,
                                  sampler_type, index);
        }
        bindings.texture += desc.count;
    }
    texture_bindings.reserve(info.texture_descriptors.size());
    for (const auto& desc : info.texture_descriptors) {
        const auto sampler_type{SamplerType(desc.type, desc.is_depth)};
        texture_bindings.push_back(bindings.texture);
        const auto indices{bindings.texture + desc.count};
        for (u32 index = bindings.texture; index < indices; ++index) {
            header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture,
                                  sampler_type, index);
        }
        bindings.texture += desc.count;
    }
}

} // namespace Shader::Backend::GLSL